专业编程基础技术教程

网站首页 > 基础教程 正文

通过Monk AI轻松进行对象检测(monitor对象存在哪里)

ccvgpt 2024-07-28 12:18:51 基础教程 9 ℃

介绍

对象检测是一种计算机视觉技术,用于定位图像或视频中的对象实例。对象检测算法通常利用机器学习或深度学习来产生有一定意义的结果。

虽然有各种各样的架构用于对象检测,本文将使用MX-RCNN(Multi-View X-Ray RCNN),其架构如下图所示:

通过Monk AI轻松进行对象检测(monitor对象存在哪里)

我在本文中选择的任务是从给定图像中检测头盔。为此,我选择了Open Image Dataset 的一个子集,该子集可以公开使用。

1.安装和下载数据

首先让我们安装MONK库

# Clone the repo
! git clone https://github.com/Tessellate-Imaging/Monk_Object_Detection.git
# Install the requirements for mxrcnn
! cd Monk_Object_Detection/3_mxrcnn/installation && cat requirements_colab.txt | xargs -n 1 -L 1 pip install

MONK格式遵循以下目录结构


接下来下载数据

# Download the images 
! wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1fzhpIo9VTpj9MWlZWZDPOV77BhWGzXcP' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1fzhpIo9VTpj9MWlZWZDPOV77BhWGzXcP" -O data.zip && rm -rf /tmp/cookies.txt
# Download the csv files
! wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1-9mRoeCFsb1dbD0Vj4u2ChGldTGbCoIx' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1-9mRoeCFsb1dbD0Vj4u2ChGldTGbCoIx" -O train_labels.csv && rm -rf /tmp/cookies.txt

现在解压缩图像和csv文件并将其存储在适当的位置

! rm -rf dataset
! mkdir dataset
! unzip data.zip -d /content/dataset/
! mv /content/dataset/content/data/Images /content/dataset/
! rm -rf ./dataset/content
! mv train_labels.csv ./dataset/

2.将数据转换为COCO格式

这是我们需要的格式,需要从当前的MONK格式中获取。

让我们从导入Python库开始

import os
import numpy as np
import cv2
import dicttoxml
import xml.etree.ElementTree as ET
from xml.dom.minidom import parseString
from tqdm import tqdm
import shutil
import json
import pandas as pd
root = "./dataset/";
img_dir = "Images/";
anno_file = "train_labels.csv";

现在,使用以下Python代码将数据转换为COCO格式

dataset_path = root;
images_folder = root + “/” + img_dir;
annotations_path = root + “/annotations/”;
if not os.path.isdir(annotations_path):
    os.mkdir(annotations_path)
input_images_folder = images_folder;
input_annotations_path = root + “/” + anno_file;
output_dataset_path = root;
output_image_folder = input_images_folder;
output_annotation_folder = annotations_path;
tmp = img_dir.replace(“/”, “”);
output_annotation_file = output_annotation_folder + “/instances_” + tmp + “.json”;
output_classes_file = output_annotation_folder + “/classes.txt”;
if not os.path.isdir(output_annotation_folder):
    os.mkdir(output_annotation_folder);
df = pd.read_csv(input_annotations_path);
columns = df.columns
delimiter = “ “;
list_dict = [];
anno = [];
for i in range(len(df)):
    img_name = df[columns[0]][i];
    labels = df[columns[1]][i];
    tmp = labels.split(delimiter);
    for j in range(len(tmp)//5):
        label = tmp[j*5+4];
        if(label not in anno):
            anno.append(label);
    anno = sorted(anno)
for i in tqdm(range(len(anno))):
    tmp = {};
    tmp[“supercategory”] = “master”;
    tmp[“id”] = i;
    tmp[“name”] = anno[i];
    list_dict.append(tmp);
anno_f = open(output_classes_file, ‘w’);
for i in range(len(anno)):
    anno_f.write(anno[i] + “\n”);
anno_f.close();
coco_data = {};
coco_data["type"] = "instances";
coco_data["images"] = [];
coco_data["annotations"] = [];
coco_data["categories"] = list_dict;
image_id = 0;
annotation_id = 0;
for i in tqdm(range(len(df))):
    img_name = df[columns[0]][i];
    labels = df[columns[1]][i];
    tmp = labels.split(delimiter);
    image_in_path = input_images_folder + "/" + img_name;
    img = cv2.imread(image_in_path, 1);
    h, w, c = img.shape;
images_tmp = {};
images_tmp["file_name"] = img_name;
images_tmp["height"] = h;
images_tmp["width"] = w;
images_tmp["id"] = image_id;
coco_data["images"].append(images_tmp);
for j in range(len(tmp)//5):
    x1 = int(tmp[j*5+0]);
    y1 = int(tmp[j*5+1]);
    x2 = int(tmp[j*5+2]);
    y2 = int(tmp[j*5+3]);
    label = tmp[j*5+4];
    annotations_tmp = {};
    annotations_tmp["id"] = annotation_id;
    annotation_id += 1;
    annotations_tmp["image_id"] = image_id;
    annotations_tmp["segmentation"] = [];
    annotations_tmp["ignore"] = 0;
    annotations_tmp["area"] = (x2-x1)*(y2-y1);
    annotations_tmp["iscrowd"] = 0;
    annotations_tmp["bbox"] = [x1, y1, x2-x1, y2-y1];
    annotations_tmp["category_id"] = anno.index(label);
    coco_data["annotations"].append(annotations_tmp)
image_id += 1;
outfile =  open(output_annotation_file, 'w');
json_str = json.dumps(coco_data, indent=4);
outfile.write(json_str);
outfile.close();

3.训练模型

再次让我们导入一些基本库

import os
import sys
sys.path.append(“Monk_Object_Detection/3_mxrcnn/lib/”)
sys.path.append(“Monk_Object_Detection/3_mxrcnn/lib/mx-rcnn”)
from train_base import *
from infer_base import *
root_dir = "./";
coco_dir = "dataset";
img_dir = "Images";

现在定义机器学习数据集

set_dataset_params(root_dir=root_dir,coco_dir=coco_dir,imageset=img_dir);

定义网络

set_model_params(model_name=”resnet50");

设置网络模型超参数

set_hyper_params(gpus=”0", lr=0.001, lr_decay_epoch=”1", epochs=2, batch_size=4);
set_output_params(log_interval=100, save_prefix="model_resnet50");
set_img_preproc_params(img_short_side=600, img_long_side=1000,mean=(123.68, 116.779, 103.99), std=(1.0, 1.0, 1.0));

初始化rpn和rcnn参数

initialize_rpn_params();
initialize_rcnn_params();

开始训练网络模型

roidb = set_dataset();
sym = set_network();
train(sym, roidb);

4.测试模型

class_file = set_class_list(“./dataset/annotations/classes.txt”);
set_model_params(model_name=”resnet50", model_path=”trained_model/model_resnet50–0002.params”);
set_hyper_params(gpus=”0", batch_size=1);
set_img_preproc_params(img_short_side=600, img_long_side=1000,
mean=(123.68, 116.779, 103.939), std=(1.0, 1.0, 1.0));
initialize_rpn_params();
initialize_rcnn_params();
sym = set_network();
mod = load_model(sym);
set_output_params(vis_thresh=0.6, vis=True)
Infer(“./dataset/Images/797709a61dd7f358.jpg”, mod);

让我们看看该模型如何训练

总而言之,Monk AI是一个很棒的库,可以轻松处理计算机视觉任务。

Tags:

最近发表
标签列表