当前位置：首页 > news >正文

深圳高端网站制作网站建设便宜公司

news 2026/5/4 23:34:41

深圳高端网站制作,网站建设便宜公司,html源码网站下载之家,本地论坛yolo.py的主要功能是构建模型。 1、最主要的函数是parse_model#xff0c;用于解析yaml文件#xff0c;并根据解析的结果搭建网络。这个函数的注释如下#xff1a; def parse_model(d, ch): # model_dict, input_channels(3)解析模型文件#xff0c;并… yolo.py的主要功能是构建模型。 1、最主要的函数是parse_model用于解析yaml文件并根据解析的结果搭建网络。这个函数的注释如下 def parse_model(d, ch): # model_dict, input_channels(3)解析模型文件并搭建网络结构主要实现功能更新当前层args计算c2(当前层的输出channel) 使用当前层的参数搭建当前层生成layerssave:params d: model_dict:params ch: 记录模型每一层的输出channel初始 ch[3],后边会删除:return nn.Sequential(*layers): 网络的每一层的层结构:return sorted(save): 把所有层结构中from不是-1的值记下并排序[4,6,10,14,17,20,23]# Parse a YOLOv5 model.yaml dictionaryLOGGER.info(f\n{:3}{from:18}{n:3}{params:10} {module:40}{arguments:30})# 可以在yaml文件中指定激活函数如何使用待定anchors, nc, gd, gw, act d[anchors], d[nc], d[depth_multiple], d[width_multiple], d.get(activation)if act:Conv.default_act eval(act) # redefine default activation, i.e. Conv.default_act nn.SiLU()LOGGER.info(f{colorstr(activation:)} {act}) # print# 三个检测头的参数3*(205) 75(VOC)na (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors 3no na * (nc 5) # number of outputs anchors * (classes 5)############################## 开始搭建网络 #################################### layers保存每一层的层结构# save 记录下所有层结构中from不是-1的结构序号# c2保存当前层输出的channellayers, save, c2 [], [], ch[-1] # layers, savelist, ch out# from(当前输入来自哪些层) number(当前的层数) module(当前层类别) args(当前层参数)for i, (f, n, m, args) in enumerate(d[backbone] d[head]): # from, number, module, args# 得到当前层的真实类名 m Focus - class models.common.Focusm eval(m) if isinstance(m, str) else m # eval stringsfor j, a in enumerate(args):# 处理eval函数中出现变量未定义的情况(NameError)# 如果a是一个字符串类型str则将其作为表达式进行求值得到结果。如果a不是字符串类型则直接使用a的值。# 把结果赋值给args[j]以此实现动态地根据字符串表达式来更新args参数列表with contextlib.suppress(NameError):args[j] eval(a) if isinstance(a, str) else a # eval strings# n当前层数 gd depth_multiple n n_ max(round(n * gd), 1) if n 1 else n # depth gainif m in {Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x}:# c1当前层的输出channel c2当前层的输出channel ch 记录所有层的输出channelc1, c2 ch[f], args[0]# 确保c2*gw能够被8整除如果不能返回一个能够被8整除的最接近于c2*gw的数# 如果不最后一层的output就控制宽度最后一层的channel必须是noif c2 ! no: # if not outputc2 make_divisible(c2 * gw, 8)# 在初始args上更新加入当前层的输入channelargs [c1, c2, *args[1:]] # [in_channel,out_channel,*args[1:]]if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:# 在第二个位置上插入bottleneck的个数nargs.insert(2, n) # number of repeats n 1 # 恢复默认值1elif m is nn.BatchNorm2d:# 返回上一层的输出channelargs [ch[f]]elif m is Concat:# 把f中的输出累加到这层的channelc2 sum(ch[x] for x in f)# TODO: channel, gw, gdelif m in {Detect, Segment}:# 在args中加入三个Detect层的输出channelargs.append([ch[x] for x in f])if isinstance(args[1], int): # number of anchors # 几乎不执行args[1] [list(range(args[1] * 2))] * len(f)if m is Segment:args[3] make_divisible(args[3] * gw, 8)elif m is Contract: # 几乎不使用c2 ch[f] * args[0] ** 2elif m is Expand: # 几乎不使用c2 ch[f] // args[0] ** 2else:# Unsample args不变c2 ch[f]# 调用m(类)根据参数args创建当前层的module并赋值给m_创建数量为nm_ nn.Sequential(*(m(*args) for _ in range(n))) if n 1 else m(*args) # module# 打印一些基本信息t str(m)[8:-2].replace(__main__., ) # module typenp sum(x.numel() for x in m_.parameters()) # number paramsm_.i, m_.f, m_.type, m_.np i, f, t, np # attach index, from index, type, number paramsLOGGER.info(f{i:3}{str(f):18}{n_:3}{np:10.0f} {t:40}{str(args):30}) # print# 把所有层结构中from不是-1的值记下[6,4,14,10,17,20,23]save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x ! -1) # append to savelist# 把当前层结构module加入到layers中layers.append(m_)if i 0:ch [] # 去除输入channel# 把当前层输出的channel加入chch.append(c2)return nn.Sequential(*layers), sorted(save)parse_model在DetectionModel的__init__函数中调用。 # 调用parse_modelself.model, self.save parse_model(deepcopy(self.yaml), ch[ch]) # model, savelistself.names [str(i) for i in range(self.yaml[nc])] # default namesself.inplace self.yaml.get(inplace, True) 2、Detect类用于构建最后的detect层在parse_model函数中调用 # 构建Detect层把feature map通过一个卷积操作和公式计算到需要的shape为后边计算loss和NMS做准备。 class Detect(nn.Module):# YOLOv5 Detect head for detection modelsstride None # strides computed during builddynamic False # force grid reconstructionexport False # export modedef __init__(self, nc80, anchors(), ch(), inplaceTrue): # detection layerdetection layer相当于yolov3的YOLOLayer层:params nc: number of class:params anchors:传入3个feature map上的所有anchor的大小(P3,P4,P5):params ch:[128,256,512] 3个输出feanture map的chaannelsuper().__init__()self.nc nc # number of classesself.no nc 5 # number of outputs per anchorself.nl len(anchors) # number of detection layersself.na len(anchors[0]) // 2 # number of anchors 每个feature map的anchor个数self.grid [torch.empty(0) for _ in range(self.nl)] # init grid self.anchor_grid [torch.empty(0) for _ in range(self.nl)] # init anchor grid # 模型中需要保存的参数有两种一种是需要使用optimizer更新的一种是不需要被更新的称为buffer# buffer的参数更新在forward而optim.step只能更新nn.parameter类型的参数# anchor.shape shape(nl,na,2)self.register_buffer(anchors, torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2)# 对每个输出feature map都调用一次conv1*1self.m nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv# 默认为True默认不使用AWS Inferentia加速self.inplace inplace # use inplace ops (e.g. slice assignment)def forward(self, x)::return train:一个tensor list存放三个元素[bs, anchor_num, grid_w, grid_h, xywhclasses]以VOC为例,[1, 3, 80, 80, 25] [1, 3, 40, 40, 25] [1, 3, 20, 20, 25]inference:0 preds [1, 1920048001200, 25] [bs, anchor_num*grid_w*grid_h, xywhclasses]1 train_out :一个tensor list存放三个元素[bs, anchor_num, grid_w, grid_h, xywhclasses][1, 3, 80, 80, 25] [1, 3, 40, 40, 25] [1, 3, 20, 20, 25]z [] # inference outputfor i in range(self.nl): # 对3个feature map分别进行处理x[i] self.m[i](x[i]) # convbs, _, ny, nx x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)# [bs, 75, 80, 80] to [bs, 3, 25, 80, 80] to [bs, 3, 80, 80, 25]x[i] x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()if not self.training: # inference# inference 返回的不是归一化后的网格的偏移量需要加上网格的位置得到最终的预测坐标再送入NMS# 构建网络就是为了记录每个grid的网格坐标方便后边使用# dynamic默认为falseif self.dynamic or self.grid[i].shape[2:4] ! x[i].shape[2:4]:self.grid[i], self.anchor_grid[i] self._make_grid(nx, ny, i)if isinstance(self, Segment): # (boxes masks)xy, wh, conf, mask x[i].split((2, 2, self.nc 1, self.no - self.nc - 5), 4)xy (xy.sigmoid() * 2 self.grid[i]) * self.stride[i] # xywh (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i] # why torch.cat((xy, wh, conf.sigmoid(), mask), 4)else: # Detect (boxes only)xy, wh, conf x[i].sigmoid().split((2, 2, self.nc 1), 4)xy (xy * 2 self.grid[i]) * self.stride[i] # xywh (wh * 2) ** 2 * self.anchor_grid[i] # why torch.cat((xy, wh, conf), 4)# z是一个teensor list三个元素分别是[1,19200,25] [1, 4800, 25], [1, 1200, 25]z.append(y.view(bs, self.na * nx * ny, self.no))return x if self.training else (torch.cat(z, 1), ) if self.export else (torch.cat(z, 1), x)def _make_grid(self, nx20, ny20, i0, torch_1_10check_version(torch.__version__, 1.10.0)):生成网格和锚框的张量网格形状由ny和nx居多锚框形状由self.anchors[i]和self.stride[i]决定:params nx: 网格宽度:params ny: 网格高度 :params i: 锚框索引:params torch_1_10:判断torch版本是否大于1.10.0:return grid::return anchor_grid:d self.anchors[i].device # 锚框的devicet self.anchors[i].dtype # 锚框的数据类型# self.na 每个feature map的anchor的个数shape 1, self.na, ny, nx, 2 # grid shape# y是长度为ny的张量x是长度为nx的张量y和x分别表示网格的纵坐标和横坐标y, x torch.arange(ny, deviced, dtypet), torch.arange(nx, deviced, dtypet)# 如果torch_1_10为True表示torch版本大于等于1.10.0代码使用torch.meshgrid函数生成网格坐标采用ij索引方式。# 否则代码使用torch.meshgrid函数生成网格坐标采用默认的索引方式yv, xv torch.meshgrid(y, x, indexingij) if torch_1_10 else torch.meshgrid(y, x) # torch0.7 compatibility# [ny, nx, 2] to [1, self.na, ny, nx, 2] - 0.5 # 以此得到网格的偏移量grid torch.stack((xv, yv), 2).expand(shape) - 0.5 # add grid offset, i.e. y 2.0 * x - 0.5anchor_grid (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape)return grid, anchor_grid 3、DetectionModel类继承自BaseModel类用于构建模型使用时定义了一个全局变量model指向这个类在其他文件中使用时直接调用model。 class BaseModel(nn.Module):# YOLOv5 base modeldef forward(self, x, profileFalse, visualizeFalse):return self._forward_once(x, profile, visualize) # single-scale inference, traindef _forward_once(self, x, profileFalse, visualizeFalse)::params x: 输入图像:params profile: True 可以做一些性能评估:params visualize: True 可以做一些特征可视化# y: 存放着self.save True 的每一层的输出因为后边层结构concat要用到# dt在profile中做性能评估时使用y, dt [], [] # outputs# 前向推理每一层结构 # m.i index m.f from m.type 类名 m.mp number of parms for m in self.model:# 4个concat操作和1个detect操作if m.f ! -1: # if not from previous layer# concat: m.f[-1,6] x就有两个元素一个是上一层的输出另一个是index层的输出x y[m.f] if isinstance(m.f, int) else [x if j -1 else y[j] for j in m.f] # from earlier layers# 打印日志信息FLOPS、timeif profile:self._profile_one_layer(m, x, dt)x m(x) # run# 存放着self.save的每一层的输出y.append(x if m.i in self.save else None) # save outputif visualize:feature_visualization(x, m.type, m.i, save_dirvisualize)return xdef _profile_one_layer(self, m, x, dt):c m self.model[-1] # is final layer, copy input as inplace fixo thop.profile(m, inputs(x.copy() if c else x, ), verboseFalse)[0] / 1E9 * 2 if thop else 0 # FLOPst time_sync()for _ in range(10):m(x.copy() if c else x)dt.append((time_sync() - t) * 100)if m self.model[0]:LOGGER.info(f{time (ms):10s} {GFLOPs:10s} {params:10s} module)LOGGER.info(f{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type})if c:LOGGER.info(f{sum(dt):10.2f} {-:10s} {-:10s} Total)def fuse(self): # fuse model Conv2d() BatchNorm2d() layers用在detect.py和val.py中fuse model conv2d() batch normLOGGER.info(Fusing layers... )for m in self.model.modules():if isinstance(m, (Conv, DWConv)) and hasattr(m, bn):m.conv fuse_conv_and_bn(m.conv, m.bn) # update convdelattr(m, bn) # remove batchnormm.forward m.forward_fuse # update forwardself.info()return selfdef info(self, verboseFalse, img_size640): # print model informationmodel_info(self, verbose, img_size)def _apply(self, fn):# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffersself super()._apply(fn)m self.model[-1] # Detect()if isinstance(m, (Detect, Segment)):m.stride fn(m.stride)m.grid list(map(fn, m.grid))if isinstance(m.anchor_grid, list):m.anchor_grid list(map(fn, m.anchor_grid))return selfclass DetectionModel(BaseModel):# YOLOv5 detection modeldef __init__(self, cfgyolov5s.yaml, ch3, ncNone, anchorsNone): # model, input channels, number of classesparams cfg: 配置文件params ch: input channelparam nc: number of classesparams anchor: 一般是nonesuper().__init__()if isinstance(cfg, dict):self.yaml cfg # model dictelse: # is *.yamlimport yaml # for torch hubself.yaml_file Path(cfg).namewith open(cfg, encodingascii, errorsignore) as f:self.yaml yaml.safe_load(f) # model dict# Define modelch self.yaml[ch] self.yaml.get(ch, ch) # input channelsif nc and nc ! self.yaml[nc]:LOGGER.info(fOverriding model.yaml nc{self.yaml[nc]} with nc{nc})self.yaml[nc] nc # override yaml valueif anchors:LOGGER.info(fOverriding model.yaml anchors with anchors{anchors})self.yaml[anchors] round(anchors) # override yaml value# 调用parse_model 创建网络模型# self.model 初始化的整个网络结构# self.save 所有层结构中from不等于-1的序号self.model, self.save parse_model(deepcopy(self.yaml), ch[ch]) # model, savelist# default class names [0, 1, 2, 3, 4.......]self.names [str(i) for i in range(self.yaml[nc])] # default namesself.inplace self.yaml.get(inplace, True)# Build strides, anchors# 获取Detect 模块的stride(相对于输入图像的下采样率)和anchors在当前Detect输出的feature map的尺度m self.model[-1] # Detect()if isinstance(m, (Detect, Segment)):s 256 # 2x min stridem.inplace self.inplaceforward lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)# 计算三个feature map的下采样率m.stride torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward# 检查anchor顺序是否与stride顺序是否一致check_anchor_order(m)# 求出相对于当前feature map的anchor大小 [10,13]/8[1.25,1.625]m.anchors / m.stride.view(-1, 1, 1)self.stride m.strideself._initialize_biases() # only run once# Init weights, biasesinitialize_weights(self) # 初始化模型权重self.info() LOGGER.info()def forward(self, x, augmentFalse, profileFalse, visualizeFalse):# 是否在测试时使用Test Time Augmentation(TTA)if augment:return self._forward_augment(x) # augmented inference, None# 默认执行前向推理# single-scale inference, train # _forward_once在BaseModel中实现return self._forward_once(x, profile, visualize) # single-scale inference, traindef _forward_augment(self, x):Test Time Augmentation(TTA) img_size x.shape[-2:] # height, widths [1, 0.83, 0.67] # scalesf [None, 3, None] # flips (2-ud上下, 3-lr左右)y [] # outputsfor si, fi in zip(s, f):# scale_img缩放图片尺寸xi scale_img(x.flip(fi) if fi else x, si, gsint(self.stride.max()))yi self._forward_once(xi)[0] # forward# cv2.imwrite(fimg_{si}.jpg, 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save# descaleyi self._descale_pred(yi, fi, si, img_size)y.append(yi)y self._clip_augmented(y) # clip augmented tailsreturn torch.cat(y, 1), None # augmented inference, traindef _descale_pred(self, p, flips, scale, img_size):# de-scale predictions following augmented inference (inverse operation)if self.inplace:p[..., :4] / scale # de-scaleif flips 2:p[..., 1] img_size[0] - p[..., 1] # de-flip udelif flips 3:p[..., 0] img_size[1] - p[..., 0] # de-flip lrelse:x, y, wh p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scaleif flips 2:y img_size[0] - y # de-flip udelif flips 3:x img_size[1] - x # de-flip lrp torch.cat((x, y, wh, p[..., 4:]), -1)return pdef _clip_augmented(self, y):# Clip YOLOv5 augmented inference tailsnl self.model[-1].nl # number of detection layers (P3-P5)g sum(4 ** x for x in range(nl)) # grid pointse 1 # exclude layer counti (y[0].shape[1] // g) * sum(4 ** x for x in range(e)) # indicesy[0] y[0][:, :-i] # largei (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indicesy[-1] y[-1][:, i:] # smallreturn ydef _initialize_biases(self, cfNone): # initialize biases into Detect(), cf is class frequency# https://arxiv.org/abs/1708.02002 section 3.3# cf torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlengthnc) 1.m self.model[-1] # Detect() modulefor mi, s in zip(m.m, m.stride): # fromb mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)b.data[:, 4] math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)b.data[:, 5:5 m.nc] math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum()) # clsmi.bias torch.nn.Parameter(b.view(-1), requires_gradTrue) 使用语句 Model DetectionModel # retain YOLOv5 Model class for backwards compatibility

查看全文

http://www.hkea.cn/news/14534295/