From ff362fd735d9702a1cd094ffbdca6ecd3a4a5205 Mon Sep 17 00:00:00 2001 From: <> Date: Tue, 27 Aug 2024 09:01:53 +0000 Subject: [PATCH] Deployed 935ccba with MkDocs version: 1.4.2 --- .nojekyll | 0 404.html | 1052 +++ assets/_mkdocstrings.css | 119 + assets/images/favicon.png | Bin 0 -> 1870 bytes assets/javascripts/bundle.220ee61c.min.js | 29 + assets/javascripts/bundle.220ee61c.min.js.map | 8 + assets/javascripts/lunr/min/lunr.ar.min.js | 1 + assets/javascripts/lunr/min/lunr.da.min.js | 18 + assets/javascripts/lunr/min/lunr.de.min.js | 18 + assets/javascripts/lunr/min/lunr.du.min.js | 18 + assets/javascripts/lunr/min/lunr.es.min.js | 18 + assets/javascripts/lunr/min/lunr.fi.min.js | 18 + assets/javascripts/lunr/min/lunr.fr.min.js | 18 + assets/javascripts/lunr/min/lunr.hi.min.js | 1 + assets/javascripts/lunr/min/lunr.hu.min.js | 18 + assets/javascripts/lunr/min/lunr.hy.min.js | 1 + assets/javascripts/lunr/min/lunr.it.min.js | 18 + assets/javascripts/lunr/min/lunr.ja.min.js | 1 + assets/javascripts/lunr/min/lunr.jp.min.js | 1 + assets/javascripts/lunr/min/lunr.kn.min.js | 1 + assets/javascripts/lunr/min/lunr.ko.min.js | 1 + assets/javascripts/lunr/min/lunr.multi.min.js | 1 + assets/javascripts/lunr/min/lunr.nl.min.js | 18 + assets/javascripts/lunr/min/lunr.no.min.js | 18 + assets/javascripts/lunr/min/lunr.pt.min.js | 18 + assets/javascripts/lunr/min/lunr.ro.min.js | 18 + assets/javascripts/lunr/min/lunr.ru.min.js | 18 + assets/javascripts/lunr/min/lunr.sa.min.js | 1 + .../lunr/min/lunr.stemmer.support.min.js | 1 + assets/javascripts/lunr/min/lunr.sv.min.js | 18 + assets/javascripts/lunr/min/lunr.ta.min.js | 1 + assets/javascripts/lunr/min/lunr.te.min.js | 1 + assets/javascripts/lunr/min/lunr.th.min.js | 1 + assets/javascripts/lunr/min/lunr.tr.min.js | 18 + assets/javascripts/lunr/min/lunr.vi.min.js | 1 + assets/javascripts/lunr/min/lunr.zh.min.js | 1 + assets/javascripts/lunr/tinyseg.js | 206 + assets/javascripts/lunr/wordcut.js | 6708 +++++++++++++++++ .../workers/search.74e28a9f.min.js | 42 + .../workers/search.74e28a9f.min.js.map | 8 + assets/stylesheets/main.eebd395e.min.css | 1 + assets/stylesheets/main.eebd395e.min.css.map | 1 + assets/stylesheets/palette.ecc896b0.min.css | 1 + .../stylesheets/palette.ecc896b0.min.css.map | 1 + en/how_to_guides/callback/index.html | 1173 +++ en/how_to_guides/data_preparation/index.html | 1230 +++ en/how_to_guides/write_a_new_model/index.html | 1552 ++++ en/index.html | 1379 ++++ en/installation/index.html | 1295 ++++ en/modelzoo/benchmark/index.html | 1668 ++++ en/modelzoo/yolov3/index.html | 1483 ++++ en/modelzoo/yolov4/index.html | 1526 ++++ en/modelzoo/yolov5/index.html | 1516 ++++ en/modelzoo/yolov7/index.html | 1486 ++++ en/modelzoo/yolov8/index.html | 1578 ++++ en/modelzoo/yolox/index.html | 1526 ++++ en/notes/changelog/index.html | 1140 +++ en/notes/code_of_conduct/index.html | 1140 +++ en/notes/contributing/index.html | 1387 ++++ en/notes/faq/index.html | 1123 +++ en/reference/data/index.html | 4265 +++++++++++ en/reference/models/index.html | 1709 +++++ en/tutorials/configuration/index.html | 1730 +++++ en/tutorials/data_augmentation/index.html | 1375 ++++ en/tutorials/deployment/index.html | 2068 +++++ en/tutorials/finetune/index.html | 1381 ++++ en/tutorials/modelarts/index.html | 1288 ++++ en/tutorials/quick_start/index.html | 1323 ++++ how_to_guides/callback/index.html | 1173 +++ how_to_guides/data_preparation/index.html | 1230 +++ how_to_guides/write_a_new_model/index.html | 1552 ++++ index.html | 1379 ++++ installation/index.html | 1295 ++++ modelzoo/benchmark/index.html | 1668 ++++ modelzoo/yolov3/index.html | 1483 ++++ modelzoo/yolov4/index.html | 1526 ++++ modelzoo/yolov5/index.html | 1516 ++++ modelzoo/yolov7/index.html | 1486 ++++ modelzoo/yolov8/index.html | 1578 ++++ modelzoo/yolox/index.html | 1526 ++++ notes/changelog/index.html | 1140 +++ notes/code_of_conduct/index.html | 1140 +++ notes/contributing/index.html | 1387 ++++ notes/faq/index.html | 1123 +++ objects.inv | 7 + reference/data/index.html | 4265 +++++++++++ reference/models/index.html | 1709 +++++ search/search_index.json | 1 + sitemap.xml | 195 + sitemap.xml.gz | Bin 0 -> 696 bytes tutorials/configuration/index.html | 1730 +++++ tutorials/data_augmentation/index.html | 1375 ++++ tutorials/deployment/index.html | 2068 +++++ tutorials/finetune/index.html | 1381 ++++ tutorials/modelarts/index.html | 1288 ++++ tutorials/quick_start/index.html | 1323 ++++ zh/how_to_guides/callback/index.html | 1174 +++ zh/how_to_guides/data_preparation/index.html | 1230 +++ zh/how_to_guides/write_a_new_model/index.html | 1553 ++++ zh/index.html | 1378 ++++ zh/installation/index.html | 1295 ++++ zh/modelzoo/benchmark/index.html | 1668 ++++ zh/modelzoo/yolov3/index.html | 1483 ++++ zh/modelzoo/yolov4/index.html | 1518 ++++ zh/modelzoo/yolov5/index.html | 1516 ++++ zh/modelzoo/yolov7/index.html | 1486 ++++ zh/modelzoo/yolov8/index.html | 1590 ++++ zh/modelzoo/yolox/index.html | 1526 ++++ zh/notes/changelog/index.html | 1140 +++ zh/notes/code_of_conduct/index.html | 1140 +++ zh/notes/contributing/index.html | 1393 ++++ zh/notes/faq/index.html | 1123 +++ zh/reference/data/index.html | 4265 +++++++++++ zh/reference/models/index.html | 1709 +++++ zh/tutorials/configuration/index.html | 1730 +++++ zh/tutorials/data_augmentation/index.html | 1375 ++++ zh/tutorials/deployment/index.html | 2069 +++++ zh/tutorials/finetune/index.html | 1381 ++++ zh/tutorials/modelarts/index.html | 1286 ++++ zh/tutorials/quick_start/index.html | 1322 ++++ 120 files changed, 120696 insertions(+) create mode 100644 .nojekyll create mode 100644 404.html create mode 100644 assets/_mkdocstrings.css create mode 100644 assets/images/favicon.png create mode 100644 assets/javascripts/bundle.220ee61c.min.js create mode 100644 assets/javascripts/bundle.220ee61c.min.js.map create mode 100644 assets/javascripts/lunr/min/lunr.ar.min.js create mode 100644 assets/javascripts/lunr/min/lunr.da.min.js create mode 100644 assets/javascripts/lunr/min/lunr.de.min.js create mode 100644 assets/javascripts/lunr/min/lunr.du.min.js create mode 100644 assets/javascripts/lunr/min/lunr.es.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hu.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hy.min.js create mode 100644 assets/javascripts/lunr/min/lunr.it.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ja.min.js create mode 100644 assets/javascripts/lunr/min/lunr.jp.min.js create mode 100644 assets/javascripts/lunr/min/lunr.kn.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ko.min.js create mode 100644 assets/javascripts/lunr/min/lunr.multi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.nl.min.js create mode 100644 assets/javascripts/lunr/min/lunr.no.min.js create mode 100644 assets/javascripts/lunr/min/lunr.pt.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ro.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ru.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sa.min.js create mode 100644 assets/javascripts/lunr/min/lunr.stemmer.support.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sv.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ta.min.js create mode 100644 assets/javascripts/lunr/min/lunr.te.min.js create mode 100644 assets/javascripts/lunr/min/lunr.th.min.js create mode 100644 assets/javascripts/lunr/min/lunr.tr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.vi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.zh.min.js create mode 100644 assets/javascripts/lunr/tinyseg.js create mode 100644 assets/javascripts/lunr/wordcut.js create mode 100644 assets/javascripts/workers/search.74e28a9f.min.js create mode 100644 assets/javascripts/workers/search.74e28a9f.min.js.map create mode 100644 assets/stylesheets/main.eebd395e.min.css create mode 100644 assets/stylesheets/main.eebd395e.min.css.map create mode 100644 assets/stylesheets/palette.ecc896b0.min.css create mode 100644 assets/stylesheets/palette.ecc896b0.min.css.map create mode 100644 en/how_to_guides/callback/index.html create mode 100644 en/how_to_guides/data_preparation/index.html create mode 100644 en/how_to_guides/write_a_new_model/index.html create mode 100644 en/index.html create mode 100644 en/installation/index.html create mode 100644 en/modelzoo/benchmark/index.html create mode 100644 en/modelzoo/yolov3/index.html create mode 100644 en/modelzoo/yolov4/index.html create mode 100644 en/modelzoo/yolov5/index.html create mode 100644 en/modelzoo/yolov7/index.html create mode 100644 en/modelzoo/yolov8/index.html create mode 100644 en/modelzoo/yolox/index.html create mode 100644 en/notes/changelog/index.html create mode 100644 en/notes/code_of_conduct/index.html create mode 100644 en/notes/contributing/index.html create mode 100644 en/notes/faq/index.html create mode 100644 en/reference/data/index.html create mode 100644 en/reference/models/index.html create mode 100644 en/tutorials/configuration/index.html create mode 100644 en/tutorials/data_augmentation/index.html create mode 100644 en/tutorials/deployment/index.html create mode 100644 en/tutorials/finetune/index.html create mode 100644 en/tutorials/modelarts/index.html create mode 100644 en/tutorials/quick_start/index.html create mode 100644 how_to_guides/callback/index.html create mode 100644 how_to_guides/data_preparation/index.html create mode 100644 how_to_guides/write_a_new_model/index.html create mode 100644 index.html create mode 100644 installation/index.html create mode 100644 modelzoo/benchmark/index.html create mode 100644 modelzoo/yolov3/index.html create mode 100644 modelzoo/yolov4/index.html create mode 100644 modelzoo/yolov5/index.html create mode 100644 modelzoo/yolov7/index.html create mode 100644 modelzoo/yolov8/index.html create mode 100644 modelzoo/yolox/index.html create mode 100644 notes/changelog/index.html create mode 100644 notes/code_of_conduct/index.html create mode 100644 notes/contributing/index.html create mode 100644 notes/faq/index.html create mode 100644 objects.inv create mode 100644 reference/data/index.html create mode 100644 reference/models/index.html create mode 100644 search/search_index.json create mode 100644 sitemap.xml create mode 100644 sitemap.xml.gz create mode 100644 tutorials/configuration/index.html create mode 100644 tutorials/data_augmentation/index.html create mode 100644 tutorials/deployment/index.html create mode 100644 tutorials/finetune/index.html create mode 100644 tutorials/modelarts/index.html create mode 100644 tutorials/quick_start/index.html create mode 100644 zh/how_to_guides/callback/index.html create mode 100644 zh/how_to_guides/data_preparation/index.html create mode 100644 zh/how_to_guides/write_a_new_model/index.html create mode 100644 zh/index.html create mode 100644 zh/installation/index.html create mode 100644 zh/modelzoo/benchmark/index.html create mode 100644 zh/modelzoo/yolov3/index.html create mode 100644 zh/modelzoo/yolov4/index.html create mode 100644 zh/modelzoo/yolov5/index.html create mode 100644 zh/modelzoo/yolov7/index.html create mode 100644 zh/modelzoo/yolov8/index.html create mode 100644 zh/modelzoo/yolox/index.html create mode 100644 zh/notes/changelog/index.html create mode 100644 zh/notes/code_of_conduct/index.html create mode 100644 zh/notes/contributing/index.html create mode 100644 zh/notes/faq/index.html create mode 100644 zh/reference/data/index.html create mode 100644 zh/reference/models/index.html create mode 100644 zh/tutorials/configuration/index.html create mode 100644 zh/tutorials/data_augmentation/index.html create mode 100644 zh/tutorials/deployment/index.html create mode 100644 zh/tutorials/finetune/index.html create mode 100644 zh/tutorials/modelarts/index.html create mode 100644 zh/tutorials/quick_start/index.html diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 00000000..e69de29b diff --git a/404.html b/404.html new file mode 100644 index 00000000..f647cc3c --- /dev/null +++ b/404.html @@ -0,0 +1,1052 @@ + + + + + + + + + + + + + + + + + + MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ +

404 - Not found

+ +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/assets/_mkdocstrings.css b/assets/_mkdocstrings.css new file mode 100644 index 00000000..85449ec7 --- /dev/null +++ b/assets/_mkdocstrings.css @@ -0,0 +1,119 @@ + +/* Avoid breaking parameter names, etc. in table cells. */ +.doc-contents td code { + word-break: normal !important; +} + +/* No line break before first paragraph of descriptions. */ +.doc-md-description, +.doc-md-description>p:first-child { + display: inline; +} + +/* Max width for docstring sections tables. */ +.doc .md-typeset__table, +.doc .md-typeset__table table { + display: table !important; + width: 100%; +} + +.doc .md-typeset__table tr { + display: table-row; +} + +/* Defaults in Spacy table style. */ +.doc-param-default { + float: right; +} + +/* Backward-compatibility: docstring section titles in bold. */ +.doc-section-title { + font-weight: bold; +} + +/* Symbols in Navigation and ToC. */ +:root, +[data-md-color-scheme="default"] { + --doc-symbol-attribute-fg-color: #953800; + --doc-symbol-function-fg-color: #8250df; + --doc-symbol-method-fg-color: #8250df; + --doc-symbol-class-fg-color: #0550ae; + --doc-symbol-module-fg-color: #5cad0f; + + --doc-symbol-attribute-bg-color: #9538001a; + --doc-symbol-function-bg-color: #8250df1a; + --doc-symbol-method-bg-color: #8250df1a; + --doc-symbol-class-bg-color: #0550ae1a; + --doc-symbol-module-bg-color: #5cad0f1a; +} + +[data-md-color-scheme="slate"] { + --doc-symbol-attribute-fg-color: #ffa657; + --doc-symbol-function-fg-color: #d2a8ff; + --doc-symbol-method-fg-color: #d2a8ff; + --doc-symbol-class-fg-color: #79c0ff; + --doc-symbol-module-fg-color: #baff79; + + --doc-symbol-attribute-bg-color: #ffa6571a; + --doc-symbol-function-bg-color: #d2a8ff1a; + --doc-symbol-method-bg-color: #d2a8ff1a; + --doc-symbol-class-bg-color: #79c0ff1a; + --doc-symbol-module-bg-color: #baff791a; +} + +code.doc-symbol { + border-radius: .1rem; + font-size: .85em; + padding: 0 .3em; + font-weight: bold; +} + +code.doc-symbol-attribute { + color: var(--doc-symbol-attribute-fg-color); + background-color: var(--doc-symbol-attribute-bg-color); +} + +code.doc-symbol-attribute::after { + content: "attr"; +} + +code.doc-symbol-function { + color: var(--doc-symbol-function-fg-color); + background-color: var(--doc-symbol-function-bg-color); +} + +code.doc-symbol-function::after { + content: "func"; +} + +code.doc-symbol-method { + color: var(--doc-symbol-method-fg-color); + background-color: var(--doc-symbol-method-bg-color); +} + +code.doc-symbol-method::after { + content: "meth"; +} + +code.doc-symbol-class { + color: var(--doc-symbol-class-fg-color); + background-color: var(--doc-symbol-class-bg-color); +} + +code.doc-symbol-class::after { + content: "class"; +} + +code.doc-symbol-module { + color: var(--doc-symbol-module-fg-color); + background-color: var(--doc-symbol-module-bg-color); +} + +code.doc-symbol-module::after { + content: "mod"; +} + +.doc-signature .autorefs { + color: inherit; + border-bottom: 1px dotted currentcolor; +} diff --git a/assets/images/favicon.png b/assets/images/favicon.png new file mode 100644 index 0000000000000000000000000000000000000000..1cf13b9f9d978896599290a74f77d5dbe7d1655c GIT binary patch literal 1870 zcmV-U2eJ5xP)Gc)JR9QMau)O=X#!i9;T z37kk-upj^(fsR36MHs_+1RCI)NNu9}lD0S{B^g8PN?Ww(5|~L#Ng*g{WsqleV}|#l zz8@ri&cTzw_h33bHI+12+kK6WN$h#n5cD8OQt`5kw6p~9H3()bUQ8OS4Q4HTQ=1Ol z_JAocz`fLbT2^{`8n~UAo=#AUOf=SOq4pYkt;XbC&f#7lb$*7=$na!mWCQ`dBQsO0 zLFBSPj*N?#u5&pf2t4XjEGH|=pPQ8xh7tpx;US5Cx_Ju;!O`ya-yF`)b%TEt5>eP1ZX~}sjjA%FJF?h7cX8=b!DZl<6%Cv z*G0uvvU+vmnpLZ2paivG-(cd*y3$hCIcsZcYOGh{$&)A6*XX&kXZd3G8m)G$Zz-LV z^GF3VAW^Mdv!)4OM8EgqRiz~*Cji;uzl2uC9^=8I84vNp;ltJ|q-*uQwGp2ma6cY7 z;`%`!9UXO@fr&Ebapfs34OmS9^u6$)bJxrucutf>`dKPKT%%*d3XlFVKunp9 zasduxjrjs>f8V=D|J=XNZp;_Zy^WgQ$9WDjgY=z@stwiEBm9u5*|34&1Na8BMjjgf3+SHcr`5~>oz1Y?SW^=K z^bTyO6>Gar#P_W2gEMwq)ot3; zREHn~U&Dp0l6YT0&k-wLwYjb?5zGK`W6S2v+K>AM(95m2C20L|3m~rN8dprPr@t)5lsk9Hu*W z?pS990s;Ez=+Rj{x7p``4>+c0G5^pYnB1^!TL=(?HLHZ+HicG{~4F1d^5Awl_2!1jICM-!9eoLhbbT^;yHcefyTAaqRcY zmuctDopPT!%k+}x%lZRKnzykr2}}XfG_ne?nRQO~?%hkzo;@RN{P6o`&mMUWBYMTe z6i8ChtjX&gXl`nvrU>jah)2iNM%JdjqoaeaU%yVn!^70x-flljp6Q5tK}5}&X8&&G zX3fpb3E(!rH=zVI_9Gjl45w@{(ITqngWFe7@9{mX;tO25Z_8 zQHEpI+FkTU#4xu>RkN>b3Tnc3UpWzPXWm#o55GKF09j^Mh~)K7{QqbO_~(@CVq! zS<8954|P8mXN2MRs86xZ&Q4EfM@JB94b=(YGuk)s&^jiSF=t3*oNK3`rD{H`yQ?d; ztE=laAUoZx5?RC8*WKOj`%LXEkgDd>&^Q4M^z`%u0rg-It=hLCVsq!Z%^6eB-OvOT zFZ28TN&cRmgU}Elrnk43)!>Z1FCPL2K$7}gwzIc48NX}#!A1BpJP?#v5wkNprhV** z?Cpalt1oH&{r!o3eSKc&ap)iz2BTn_VV`4>9M^b3;(YY}4>#ML6{~(4mH+?%07*qo IM6N<$f(jP3KmY&$ literal 0 HcmV?d00001 diff --git a/assets/javascripts/bundle.220ee61c.min.js b/assets/javascripts/bundle.220ee61c.min.js new file mode 100644 index 00000000..116072a1 --- /dev/null +++ b/assets/javascripts/bundle.220ee61c.min.js @@ -0,0 +1,29 @@ +"use strict";(()=>{var Ci=Object.create;var gr=Object.defineProperty;var Ri=Object.getOwnPropertyDescriptor;var ki=Object.getOwnPropertyNames,Ht=Object.getOwnPropertySymbols,Hi=Object.getPrototypeOf,yr=Object.prototype.hasOwnProperty,nn=Object.prototype.propertyIsEnumerable;var rn=(e,t,r)=>t in e?gr(e,t,{enumerable:!0,configurable:!0,writable:!0,value:r}):e[t]=r,P=(e,t)=>{for(var r in t||(t={}))yr.call(t,r)&&rn(e,r,t[r]);if(Ht)for(var r of Ht(t))nn.call(t,r)&&rn(e,r,t[r]);return e};var on=(e,t)=>{var r={};for(var n in e)yr.call(e,n)&&t.indexOf(n)<0&&(r[n]=e[n]);if(e!=null&&Ht)for(var n of Ht(e))t.indexOf(n)<0&&nn.call(e,n)&&(r[n]=e[n]);return r};var Pt=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports);var Pi=(e,t,r,n)=>{if(t&&typeof t=="object"||typeof t=="function")for(let o of ki(t))!yr.call(e,o)&&o!==r&&gr(e,o,{get:()=>t[o],enumerable:!(n=Ri(t,o))||n.enumerable});return e};var yt=(e,t,r)=>(r=e!=null?Ci(Hi(e)):{},Pi(t||!e||!e.__esModule?gr(r,"default",{value:e,enumerable:!0}):r,e));var sn=Pt((xr,an)=>{(function(e,t){typeof xr=="object"&&typeof an!="undefined"?t():typeof define=="function"&&define.amd?define(t):t()})(xr,function(){"use strict";function e(r){var n=!0,o=!1,i=null,s={text:!0,search:!0,url:!0,tel:!0,email:!0,password:!0,number:!0,date:!0,month:!0,week:!0,time:!0,datetime:!0,"datetime-local":!0};function a(O){return!!(O&&O!==document&&O.nodeName!=="HTML"&&O.nodeName!=="BODY"&&"classList"in O&&"contains"in O.classList)}function f(O){var Qe=O.type,De=O.tagName;return!!(De==="INPUT"&&s[Qe]&&!O.readOnly||De==="TEXTAREA"&&!O.readOnly||O.isContentEditable)}function c(O){O.classList.contains("focus-visible")||(O.classList.add("focus-visible"),O.setAttribute("data-focus-visible-added",""))}function u(O){O.hasAttribute("data-focus-visible-added")&&(O.classList.remove("focus-visible"),O.removeAttribute("data-focus-visible-added"))}function p(O){O.metaKey||O.altKey||O.ctrlKey||(a(r.activeElement)&&c(r.activeElement),n=!0)}function m(O){n=!1}function d(O){a(O.target)&&(n||f(O.target))&&c(O.target)}function h(O){a(O.target)&&(O.target.classList.contains("focus-visible")||O.target.hasAttribute("data-focus-visible-added"))&&(o=!0,window.clearTimeout(i),i=window.setTimeout(function(){o=!1},100),u(O.target))}function v(O){document.visibilityState==="hidden"&&(o&&(n=!0),Y())}function Y(){document.addEventListener("mousemove",N),document.addEventListener("mousedown",N),document.addEventListener("mouseup",N),document.addEventListener("pointermove",N),document.addEventListener("pointerdown",N),document.addEventListener("pointerup",N),document.addEventListener("touchmove",N),document.addEventListener("touchstart",N),document.addEventListener("touchend",N)}function B(){document.removeEventListener("mousemove",N),document.removeEventListener("mousedown",N),document.removeEventListener("mouseup",N),document.removeEventListener("pointermove",N),document.removeEventListener("pointerdown",N),document.removeEventListener("pointerup",N),document.removeEventListener("touchmove",N),document.removeEventListener("touchstart",N),document.removeEventListener("touchend",N)}function N(O){O.target.nodeName&&O.target.nodeName.toLowerCase()==="html"||(n=!1,B())}document.addEventListener("keydown",p,!0),document.addEventListener("mousedown",m,!0),document.addEventListener("pointerdown",m,!0),document.addEventListener("touchstart",m,!0),document.addEventListener("visibilitychange",v,!0),Y(),r.addEventListener("focus",d,!0),r.addEventListener("blur",h,!0),r.nodeType===Node.DOCUMENT_FRAGMENT_NODE&&r.host?r.host.setAttribute("data-js-focus-visible",""):r.nodeType===Node.DOCUMENT_NODE&&(document.documentElement.classList.add("js-focus-visible"),document.documentElement.setAttribute("data-js-focus-visible",""))}if(typeof window!="undefined"&&typeof document!="undefined"){window.applyFocusVisiblePolyfill=e;var t;try{t=new CustomEvent("focus-visible-polyfill-ready")}catch(r){t=document.createEvent("CustomEvent"),t.initCustomEvent("focus-visible-polyfill-ready",!1,!1,{})}window.dispatchEvent(t)}typeof document!="undefined"&&e(document)})});var cn=Pt(Er=>{(function(e){var t=function(){try{return!!Symbol.iterator}catch(c){return!1}},r=t(),n=function(c){var u={next:function(){var p=c.shift();return{done:p===void 0,value:p}}};return r&&(u[Symbol.iterator]=function(){return u}),u},o=function(c){return encodeURIComponent(c).replace(/%20/g,"+")},i=function(c){return decodeURIComponent(String(c).replace(/\+/g," "))},s=function(){var c=function(p){Object.defineProperty(this,"_entries",{writable:!0,value:{}});var m=typeof p;if(m!=="undefined")if(m==="string")p!==""&&this._fromString(p);else if(p instanceof c){var d=this;p.forEach(function(B,N){d.append(N,B)})}else if(p!==null&&m==="object")if(Object.prototype.toString.call(p)==="[object Array]")for(var h=0;hd[0]?1:0}),c._entries&&(c._entries={});for(var p=0;p1?i(d[1]):"")}})})(typeof global!="undefined"?global:typeof window!="undefined"?window:typeof self!="undefined"?self:Er);(function(e){var t=function(){try{var o=new e.URL("b","http://a");return o.pathname="c d",o.href==="http://a/c%20d"&&o.searchParams}catch(i){return!1}},r=function(){var o=e.URL,i=function(f,c){typeof f!="string"&&(f=String(f)),c&&typeof c!="string"&&(c=String(c));var u=document,p;if(c&&(e.location===void 0||c!==e.location.href)){c=c.toLowerCase(),u=document.implementation.createHTMLDocument(""),p=u.createElement("base"),p.href=c,u.head.appendChild(p);try{if(p.href.indexOf(c)!==0)throw new Error(p.href)}catch(O){throw new Error("URL unable to set base "+c+" due to "+O)}}var m=u.createElement("a");m.href=f,p&&(u.body.appendChild(m),m.href=m.href);var d=u.createElement("input");if(d.type="url",d.value=f,m.protocol===":"||!/:/.test(m.href)||!d.checkValidity()&&!c)throw new TypeError("Invalid URL");Object.defineProperty(this,"_anchorElement",{value:m});var h=new e.URLSearchParams(this.search),v=!0,Y=!0,B=this;["append","delete","set"].forEach(function(O){var Qe=h[O];h[O]=function(){Qe.apply(h,arguments),v&&(Y=!1,B.search=h.toString(),Y=!0)}}),Object.defineProperty(this,"searchParams",{value:h,enumerable:!0});var N=void 0;Object.defineProperty(this,"_updateSearchParams",{enumerable:!1,configurable:!1,writable:!1,value:function(){this.search!==N&&(N=this.search,Y&&(v=!1,this.searchParams._fromString(this.search),v=!0))}})},s=i.prototype,a=function(f){Object.defineProperty(s,f,{get:function(){return this._anchorElement[f]},set:function(c){this._anchorElement[f]=c},enumerable:!0})};["hash","host","hostname","port","protocol"].forEach(function(f){a(f)}),Object.defineProperty(s,"search",{get:function(){return this._anchorElement.search},set:function(f){this._anchorElement.search=f,this._updateSearchParams()},enumerable:!0}),Object.defineProperties(s,{toString:{get:function(){var f=this;return function(){return f.href}}},href:{get:function(){return this._anchorElement.href.replace(/\?$/,"")},set:function(f){this._anchorElement.href=f,this._updateSearchParams()},enumerable:!0},pathname:{get:function(){return this._anchorElement.pathname.replace(/(^\/?)/,"/")},set:function(f){this._anchorElement.pathname=f},enumerable:!0},origin:{get:function(){var f={"http:":80,"https:":443,"ftp:":21}[this._anchorElement.protocol],c=this._anchorElement.port!=f&&this._anchorElement.port!=="";return this._anchorElement.protocol+"//"+this._anchorElement.hostname+(c?":"+this._anchorElement.port:"")},enumerable:!0},password:{get:function(){return""},set:function(f){},enumerable:!0},username:{get:function(){return""},set:function(f){},enumerable:!0}}),i.createObjectURL=function(f){return o.createObjectURL.apply(o,arguments)},i.revokeObjectURL=function(f){return o.revokeObjectURL.apply(o,arguments)},e.URL=i};if(t()||r(),e.location!==void 0&&!("origin"in e.location)){var n=function(){return e.location.protocol+"//"+e.location.hostname+(e.location.port?":"+e.location.port:"")};try{Object.defineProperty(e.location,"origin",{get:n,enumerable:!0})}catch(o){setInterval(function(){e.location.origin=n()},100)}}})(typeof global!="undefined"?global:typeof window!="undefined"?window:typeof self!="undefined"?self:Er)});var qr=Pt((Mt,Nr)=>{/*! + * clipboard.js v2.0.11 + * https://clipboardjs.com/ + * + * Licensed MIT © Zeno Rocha + */(function(t,r){typeof Mt=="object"&&typeof Nr=="object"?Nr.exports=r():typeof define=="function"&&define.amd?define([],r):typeof Mt=="object"?Mt.ClipboardJS=r():t.ClipboardJS=r()})(Mt,function(){return function(){var e={686:function(n,o,i){"use strict";i.d(o,{default:function(){return Ai}});var s=i(279),a=i.n(s),f=i(370),c=i.n(f),u=i(817),p=i.n(u);function m(j){try{return document.execCommand(j)}catch(T){return!1}}var d=function(T){var E=p()(T);return m("cut"),E},h=d;function v(j){var T=document.documentElement.getAttribute("dir")==="rtl",E=document.createElement("textarea");E.style.fontSize="12pt",E.style.border="0",E.style.padding="0",E.style.margin="0",E.style.position="absolute",E.style[T?"right":"left"]="-9999px";var H=window.pageYOffset||document.documentElement.scrollTop;return E.style.top="".concat(H,"px"),E.setAttribute("readonly",""),E.value=j,E}var Y=function(T,E){var H=v(T);E.container.appendChild(H);var I=p()(H);return m("copy"),H.remove(),I},B=function(T){var E=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body},H="";return typeof T=="string"?H=Y(T,E):T instanceof HTMLInputElement&&!["text","search","url","tel","password"].includes(T==null?void 0:T.type)?H=Y(T.value,E):(H=p()(T),m("copy")),H},N=B;function O(j){"@babel/helpers - typeof";return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?O=function(E){return typeof E}:O=function(E){return E&&typeof Symbol=="function"&&E.constructor===Symbol&&E!==Symbol.prototype?"symbol":typeof E},O(j)}var Qe=function(){var T=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},E=T.action,H=E===void 0?"copy":E,I=T.container,q=T.target,Me=T.text;if(H!=="copy"&&H!=="cut")throw new Error('Invalid "action" value, use either "copy" or "cut"');if(q!==void 0)if(q&&O(q)==="object"&&q.nodeType===1){if(H==="copy"&&q.hasAttribute("disabled"))throw new Error('Invalid "target" attribute. Please use "readonly" instead of "disabled" attribute');if(H==="cut"&&(q.hasAttribute("readonly")||q.hasAttribute("disabled")))throw new Error(`Invalid "target" attribute. You can't cut text from elements with "readonly" or "disabled" attributes`)}else throw new Error('Invalid "target" value, use a valid Element');if(Me)return N(Me,{container:I});if(q)return H==="cut"?h(q):N(q,{container:I})},De=Qe;function $e(j){"@babel/helpers - typeof";return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?$e=function(E){return typeof E}:$e=function(E){return E&&typeof Symbol=="function"&&E.constructor===Symbol&&E!==Symbol.prototype?"symbol":typeof E},$e(j)}function Ei(j,T){if(!(j instanceof T))throw new TypeError("Cannot call a class as a function")}function tn(j,T){for(var E=0;E0&&arguments[0]!==void 0?arguments[0]:{};this.action=typeof I.action=="function"?I.action:this.defaultAction,this.target=typeof I.target=="function"?I.target:this.defaultTarget,this.text=typeof I.text=="function"?I.text:this.defaultText,this.container=$e(I.container)==="object"?I.container:document.body}},{key:"listenClick",value:function(I){var q=this;this.listener=c()(I,"click",function(Me){return q.onClick(Me)})}},{key:"onClick",value:function(I){var q=I.delegateTarget||I.currentTarget,Me=this.action(q)||"copy",kt=De({action:Me,container:this.container,target:this.target(q),text:this.text(q)});this.emit(kt?"success":"error",{action:Me,text:kt,trigger:q,clearSelection:function(){q&&q.focus(),window.getSelection().removeAllRanges()}})}},{key:"defaultAction",value:function(I){return vr("action",I)}},{key:"defaultTarget",value:function(I){var q=vr("target",I);if(q)return document.querySelector(q)}},{key:"defaultText",value:function(I){return vr("text",I)}},{key:"destroy",value:function(){this.listener.destroy()}}],[{key:"copy",value:function(I){var q=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body};return N(I,q)}},{key:"cut",value:function(I){return h(I)}},{key:"isSupported",value:function(){var I=arguments.length>0&&arguments[0]!==void 0?arguments[0]:["copy","cut"],q=typeof I=="string"?[I]:I,Me=!!document.queryCommandSupported;return q.forEach(function(kt){Me=Me&&!!document.queryCommandSupported(kt)}),Me}}]),E}(a()),Ai=Li},828:function(n){var o=9;if(typeof Element!="undefined"&&!Element.prototype.matches){var i=Element.prototype;i.matches=i.matchesSelector||i.mozMatchesSelector||i.msMatchesSelector||i.oMatchesSelector||i.webkitMatchesSelector}function s(a,f){for(;a&&a.nodeType!==o;){if(typeof a.matches=="function"&&a.matches(f))return a;a=a.parentNode}}n.exports=s},438:function(n,o,i){var s=i(828);function a(u,p,m,d,h){var v=c.apply(this,arguments);return u.addEventListener(m,v,h),{destroy:function(){u.removeEventListener(m,v,h)}}}function f(u,p,m,d,h){return typeof u.addEventListener=="function"?a.apply(null,arguments):typeof m=="function"?a.bind(null,document).apply(null,arguments):(typeof u=="string"&&(u=document.querySelectorAll(u)),Array.prototype.map.call(u,function(v){return a(v,p,m,d,h)}))}function c(u,p,m,d){return function(h){h.delegateTarget=s(h.target,p),h.delegateTarget&&d.call(u,h)}}n.exports=f},879:function(n,o){o.node=function(i){return i!==void 0&&i instanceof HTMLElement&&i.nodeType===1},o.nodeList=function(i){var s=Object.prototype.toString.call(i);return i!==void 0&&(s==="[object NodeList]"||s==="[object HTMLCollection]")&&"length"in i&&(i.length===0||o.node(i[0]))},o.string=function(i){return typeof i=="string"||i instanceof String},o.fn=function(i){var s=Object.prototype.toString.call(i);return s==="[object Function]"}},370:function(n,o,i){var s=i(879),a=i(438);function f(m,d,h){if(!m&&!d&&!h)throw new Error("Missing required arguments");if(!s.string(d))throw new TypeError("Second argument must be a String");if(!s.fn(h))throw new TypeError("Third argument must be a Function");if(s.node(m))return c(m,d,h);if(s.nodeList(m))return u(m,d,h);if(s.string(m))return p(m,d,h);throw new TypeError("First argument must be a String, HTMLElement, HTMLCollection, or NodeList")}function c(m,d,h){return m.addEventListener(d,h),{destroy:function(){m.removeEventListener(d,h)}}}function u(m,d,h){return Array.prototype.forEach.call(m,function(v){v.addEventListener(d,h)}),{destroy:function(){Array.prototype.forEach.call(m,function(v){v.removeEventListener(d,h)})}}}function p(m,d,h){return a(document.body,m,d,h)}n.exports=f},817:function(n){function o(i){var s;if(i.nodeName==="SELECT")i.focus(),s=i.value;else if(i.nodeName==="INPUT"||i.nodeName==="TEXTAREA"){var a=i.hasAttribute("readonly");a||i.setAttribute("readonly",""),i.select(),i.setSelectionRange(0,i.value.length),a||i.removeAttribute("readonly"),s=i.value}else{i.hasAttribute("contenteditable")&&i.focus();var f=window.getSelection(),c=document.createRange();c.selectNodeContents(i),f.removeAllRanges(),f.addRange(c),s=f.toString()}return s}n.exports=o},279:function(n){function o(){}o.prototype={on:function(i,s,a){var f=this.e||(this.e={});return(f[i]||(f[i]=[])).push({fn:s,ctx:a}),this},once:function(i,s,a){var f=this;function c(){f.off(i,c),s.apply(a,arguments)}return c._=s,this.on(i,c,a)},emit:function(i){var s=[].slice.call(arguments,1),a=((this.e||(this.e={}))[i]||[]).slice(),f=0,c=a.length;for(f;f{"use strict";/*! + * escape-html + * Copyright(c) 2012-2013 TJ Holowaychuk + * Copyright(c) 2015 Andreas Lubbe + * Copyright(c) 2015 Tiancheng "Timothy" Gu + * MIT Licensed + */var rs=/["'&<>]/;Yo.exports=ns;function ns(e){var t=""+e,r=rs.exec(t);if(!r)return t;var n,o="",i=0,s=0;for(i=r.index;i0&&i[i.length-1])&&(c[0]===6||c[0]===2)){r=0;continue}if(c[0]===3&&(!i||c[1]>i[0]&&c[1]=e.length&&(e=void 0),{value:e&&e[n++],done:!e}}};throw new TypeError(t?"Object is not iterable.":"Symbol.iterator is not defined.")}function W(e,t){var r=typeof Symbol=="function"&&e[Symbol.iterator];if(!r)return e;var n=r.call(e),o,i=[],s;try{for(;(t===void 0||t-- >0)&&!(o=n.next()).done;)i.push(o.value)}catch(a){s={error:a}}finally{try{o&&!o.done&&(r=n.return)&&r.call(n)}finally{if(s)throw s.error}}return i}function D(e,t,r){if(r||arguments.length===2)for(var n=0,o=t.length,i;n1||a(m,d)})})}function a(m,d){try{f(n[m](d))}catch(h){p(i[0][3],h)}}function f(m){m.value instanceof et?Promise.resolve(m.value.v).then(c,u):p(i[0][2],m)}function c(m){a("next",m)}function u(m){a("throw",m)}function p(m,d){m(d),i.shift(),i.length&&a(i[0][0],i[0][1])}}function pn(e){if(!Symbol.asyncIterator)throw new TypeError("Symbol.asyncIterator is not defined.");var t=e[Symbol.asyncIterator],r;return t?t.call(e):(e=typeof Ee=="function"?Ee(e):e[Symbol.iterator](),r={},n("next"),n("throw"),n("return"),r[Symbol.asyncIterator]=function(){return this},r);function n(i){r[i]=e[i]&&function(s){return new Promise(function(a,f){s=e[i](s),o(a,f,s.done,s.value)})}}function o(i,s,a,f){Promise.resolve(f).then(function(c){i({value:c,done:a})},s)}}function C(e){return typeof e=="function"}function at(e){var t=function(n){Error.call(n),n.stack=new Error().stack},r=e(t);return r.prototype=Object.create(Error.prototype),r.prototype.constructor=r,r}var It=at(function(e){return function(r){e(this),this.message=r?r.length+` errors occurred during unsubscription: +`+r.map(function(n,o){return o+1+") "+n.toString()}).join(` + `):"",this.name="UnsubscriptionError",this.errors=r}});function Ve(e,t){if(e){var r=e.indexOf(t);0<=r&&e.splice(r,1)}}var Ie=function(){function e(t){this.initialTeardown=t,this.closed=!1,this._parentage=null,this._finalizers=null}return e.prototype.unsubscribe=function(){var t,r,n,o,i;if(!this.closed){this.closed=!0;var s=this._parentage;if(s)if(this._parentage=null,Array.isArray(s))try{for(var a=Ee(s),f=a.next();!f.done;f=a.next()){var c=f.value;c.remove(this)}}catch(v){t={error:v}}finally{try{f&&!f.done&&(r=a.return)&&r.call(a)}finally{if(t)throw t.error}}else s.remove(this);var u=this.initialTeardown;if(C(u))try{u()}catch(v){i=v instanceof It?v.errors:[v]}var p=this._finalizers;if(p){this._finalizers=null;try{for(var m=Ee(p),d=m.next();!d.done;d=m.next()){var h=d.value;try{ln(h)}catch(v){i=i!=null?i:[],v instanceof It?i=D(D([],W(i)),W(v.errors)):i.push(v)}}}catch(v){n={error:v}}finally{try{d&&!d.done&&(o=m.return)&&o.call(m)}finally{if(n)throw n.error}}}if(i)throw new It(i)}},e.prototype.add=function(t){var r;if(t&&t!==this)if(this.closed)ln(t);else{if(t instanceof e){if(t.closed||t._hasParent(this))return;t._addParent(this)}(this._finalizers=(r=this._finalizers)!==null&&r!==void 0?r:[]).push(t)}},e.prototype._hasParent=function(t){var r=this._parentage;return r===t||Array.isArray(r)&&r.includes(t)},e.prototype._addParent=function(t){var r=this._parentage;this._parentage=Array.isArray(r)?(r.push(t),r):r?[r,t]:t},e.prototype._removeParent=function(t){var r=this._parentage;r===t?this._parentage=null:Array.isArray(r)&&Ve(r,t)},e.prototype.remove=function(t){var r=this._finalizers;r&&Ve(r,t),t instanceof e&&t._removeParent(this)},e.EMPTY=function(){var t=new e;return t.closed=!0,t}(),e}();var Sr=Ie.EMPTY;function jt(e){return e instanceof Ie||e&&"closed"in e&&C(e.remove)&&C(e.add)&&C(e.unsubscribe)}function ln(e){C(e)?e():e.unsubscribe()}var Le={onUnhandledError:null,onStoppedNotification:null,Promise:void 0,useDeprecatedSynchronousErrorHandling:!1,useDeprecatedNextContext:!1};var st={setTimeout:function(e,t){for(var r=[],n=2;n0},enumerable:!1,configurable:!0}),t.prototype._trySubscribe=function(r){return this._throwIfClosed(),e.prototype._trySubscribe.call(this,r)},t.prototype._subscribe=function(r){return this._throwIfClosed(),this._checkFinalizedStatuses(r),this._innerSubscribe(r)},t.prototype._innerSubscribe=function(r){var n=this,o=this,i=o.hasError,s=o.isStopped,a=o.observers;return i||s?Sr:(this.currentObservers=null,a.push(r),new Ie(function(){n.currentObservers=null,Ve(a,r)}))},t.prototype._checkFinalizedStatuses=function(r){var n=this,o=n.hasError,i=n.thrownError,s=n.isStopped;o?r.error(i):s&&r.complete()},t.prototype.asObservable=function(){var r=new F;return r.source=this,r},t.create=function(r,n){return new xn(r,n)},t}(F);var xn=function(e){ie(t,e);function t(r,n){var o=e.call(this)||this;return o.destination=r,o.source=n,o}return t.prototype.next=function(r){var n,o;(o=(n=this.destination)===null||n===void 0?void 0:n.next)===null||o===void 0||o.call(n,r)},t.prototype.error=function(r){var n,o;(o=(n=this.destination)===null||n===void 0?void 0:n.error)===null||o===void 0||o.call(n,r)},t.prototype.complete=function(){var r,n;(n=(r=this.destination)===null||r===void 0?void 0:r.complete)===null||n===void 0||n.call(r)},t.prototype._subscribe=function(r){var n,o;return(o=(n=this.source)===null||n===void 0?void 0:n.subscribe(r))!==null&&o!==void 0?o:Sr},t}(x);var Et={now:function(){return(Et.delegate||Date).now()},delegate:void 0};var wt=function(e){ie(t,e);function t(r,n,o){r===void 0&&(r=1/0),n===void 0&&(n=1/0),o===void 0&&(o=Et);var i=e.call(this)||this;return i._bufferSize=r,i._windowTime=n,i._timestampProvider=o,i._buffer=[],i._infiniteTimeWindow=!0,i._infiniteTimeWindow=n===1/0,i._bufferSize=Math.max(1,r),i._windowTime=Math.max(1,n),i}return t.prototype.next=function(r){var n=this,o=n.isStopped,i=n._buffer,s=n._infiniteTimeWindow,a=n._timestampProvider,f=n._windowTime;o||(i.push(r),!s&&i.push(a.now()+f)),this._trimBuffer(),e.prototype.next.call(this,r)},t.prototype._subscribe=function(r){this._throwIfClosed(),this._trimBuffer();for(var n=this._innerSubscribe(r),o=this,i=o._infiniteTimeWindow,s=o._buffer,a=s.slice(),f=0;f0?e.prototype.requestAsyncId.call(this,r,n,o):(r.actions.push(this),r._scheduled||(r._scheduled=ut.requestAnimationFrame(function(){return r.flush(void 0)})))},t.prototype.recycleAsyncId=function(r,n,o){var i;if(o===void 0&&(o=0),o!=null?o>0:this.delay>0)return e.prototype.recycleAsyncId.call(this,r,n,o);var s=r.actions;n!=null&&((i=s[s.length-1])===null||i===void 0?void 0:i.id)!==n&&(ut.cancelAnimationFrame(n),r._scheduled=void 0)},t}(Wt);var Sn=function(e){ie(t,e);function t(){return e!==null&&e.apply(this,arguments)||this}return t.prototype.flush=function(r){this._active=!0;var n=this._scheduled;this._scheduled=void 0;var o=this.actions,i;r=r||o.shift();do if(i=r.execute(r.state,r.delay))break;while((r=o[0])&&r.id===n&&o.shift());if(this._active=!1,i){for(;(r=o[0])&&r.id===n&&o.shift();)r.unsubscribe();throw i}},t}(Dt);var Oe=new Sn(wn);var M=new F(function(e){return e.complete()});function Vt(e){return e&&C(e.schedule)}function Cr(e){return e[e.length-1]}function Ye(e){return C(Cr(e))?e.pop():void 0}function Te(e){return Vt(Cr(e))?e.pop():void 0}function zt(e,t){return typeof Cr(e)=="number"?e.pop():t}var pt=function(e){return e&&typeof e.length=="number"&&typeof e!="function"};function Nt(e){return C(e==null?void 0:e.then)}function qt(e){return C(e[ft])}function Kt(e){return Symbol.asyncIterator&&C(e==null?void 0:e[Symbol.asyncIterator])}function Qt(e){return new TypeError("You provided "+(e!==null&&typeof e=="object"?"an invalid object":"'"+e+"'")+" where a stream was expected. You can provide an Observable, Promise, ReadableStream, Array, AsyncIterable, or Iterable.")}function zi(){return typeof Symbol!="function"||!Symbol.iterator?"@@iterator":Symbol.iterator}var Yt=zi();function Gt(e){return C(e==null?void 0:e[Yt])}function Bt(e){return un(this,arguments,function(){var r,n,o,i;return $t(this,function(s){switch(s.label){case 0:r=e.getReader(),s.label=1;case 1:s.trys.push([1,,9,10]),s.label=2;case 2:return[4,et(r.read())];case 3:return n=s.sent(),o=n.value,i=n.done,i?[4,et(void 0)]:[3,5];case 4:return[2,s.sent()];case 5:return[4,et(o)];case 6:return[4,s.sent()];case 7:return s.sent(),[3,2];case 8:return[3,10];case 9:return r.releaseLock(),[7];case 10:return[2]}})})}function Jt(e){return C(e==null?void 0:e.getReader)}function U(e){if(e instanceof F)return e;if(e!=null){if(qt(e))return Ni(e);if(pt(e))return qi(e);if(Nt(e))return Ki(e);if(Kt(e))return On(e);if(Gt(e))return Qi(e);if(Jt(e))return Yi(e)}throw Qt(e)}function Ni(e){return new F(function(t){var r=e[ft]();if(C(r.subscribe))return r.subscribe(t);throw new TypeError("Provided object does not correctly implement Symbol.observable")})}function qi(e){return new F(function(t){for(var r=0;r=2;return function(n){return n.pipe(e?A(function(o,i){return e(o,i,n)}):de,ge(1),r?He(t):Dn(function(){return new Zt}))}}function Vn(){for(var e=[],t=0;t=2,!0))}function pe(e){e===void 0&&(e={});var t=e.connector,r=t===void 0?function(){return new x}:t,n=e.resetOnError,o=n===void 0?!0:n,i=e.resetOnComplete,s=i===void 0?!0:i,a=e.resetOnRefCountZero,f=a===void 0?!0:a;return function(c){var u,p,m,d=0,h=!1,v=!1,Y=function(){p==null||p.unsubscribe(),p=void 0},B=function(){Y(),u=m=void 0,h=v=!1},N=function(){var O=u;B(),O==null||O.unsubscribe()};return y(function(O,Qe){d++,!v&&!h&&Y();var De=m=m!=null?m:r();Qe.add(function(){d--,d===0&&!v&&!h&&(p=$r(N,f))}),De.subscribe(Qe),!u&&d>0&&(u=new rt({next:function($e){return De.next($e)},error:function($e){v=!0,Y(),p=$r(B,o,$e),De.error($e)},complete:function(){h=!0,Y(),p=$r(B,s),De.complete()}}),U(O).subscribe(u))})(c)}}function $r(e,t){for(var r=[],n=2;ne.next(document)),e}function K(e,t=document){return Array.from(t.querySelectorAll(e))}function z(e,t=document){let r=ce(e,t);if(typeof r=="undefined")throw new ReferenceError(`Missing element: expected "${e}" to be present`);return r}function ce(e,t=document){return t.querySelector(e)||void 0}function _e(){return document.activeElement instanceof HTMLElement&&document.activeElement||void 0}function tr(e){return L(b(document.body,"focusin"),b(document.body,"focusout")).pipe(ke(1),l(()=>{let t=_e();return typeof t!="undefined"?e.contains(t):!1}),V(e===_e()),J())}function Xe(e){return{x:e.offsetLeft,y:e.offsetTop}}function Kn(e){return L(b(window,"load"),b(window,"resize")).pipe(Ce(0,Oe),l(()=>Xe(e)),V(Xe(e)))}function rr(e){return{x:e.scrollLeft,y:e.scrollTop}}function dt(e){return L(b(e,"scroll"),b(window,"resize")).pipe(Ce(0,Oe),l(()=>rr(e)),V(rr(e)))}var Yn=function(){if(typeof Map!="undefined")return Map;function e(t,r){var n=-1;return t.some(function(o,i){return o[0]===r?(n=i,!0):!1}),n}return function(){function t(){this.__entries__=[]}return Object.defineProperty(t.prototype,"size",{get:function(){return this.__entries__.length},enumerable:!0,configurable:!0}),t.prototype.get=function(r){var n=e(this.__entries__,r),o=this.__entries__[n];return o&&o[1]},t.prototype.set=function(r,n){var o=e(this.__entries__,r);~o?this.__entries__[o][1]=n:this.__entries__.push([r,n])},t.prototype.delete=function(r){var n=this.__entries__,o=e(n,r);~o&&n.splice(o,1)},t.prototype.has=function(r){return!!~e(this.__entries__,r)},t.prototype.clear=function(){this.__entries__.splice(0)},t.prototype.forEach=function(r,n){n===void 0&&(n=null);for(var o=0,i=this.__entries__;o0},e.prototype.connect_=function(){!Wr||this.connected_||(document.addEventListener("transitionend",this.onTransitionEnd_),window.addEventListener("resize",this.refresh),va?(this.mutationsObserver_=new MutationObserver(this.refresh),this.mutationsObserver_.observe(document,{attributes:!0,childList:!0,characterData:!0,subtree:!0})):(document.addEventListener("DOMSubtreeModified",this.refresh),this.mutationEventsAdded_=!0),this.connected_=!0)},e.prototype.disconnect_=function(){!Wr||!this.connected_||(document.removeEventListener("transitionend",this.onTransitionEnd_),window.removeEventListener("resize",this.refresh),this.mutationsObserver_&&this.mutationsObserver_.disconnect(),this.mutationEventsAdded_&&document.removeEventListener("DOMSubtreeModified",this.refresh),this.mutationsObserver_=null,this.mutationEventsAdded_=!1,this.connected_=!1)},e.prototype.onTransitionEnd_=function(t){var r=t.propertyName,n=r===void 0?"":r,o=ba.some(function(i){return!!~n.indexOf(i)});o&&this.refresh()},e.getInstance=function(){return this.instance_||(this.instance_=new e),this.instance_},e.instance_=null,e}(),Gn=function(e,t){for(var r=0,n=Object.keys(t);r0},e}(),Jn=typeof WeakMap!="undefined"?new WeakMap:new Yn,Xn=function(){function e(t){if(!(this instanceof e))throw new TypeError("Cannot call a class as a function.");if(!arguments.length)throw new TypeError("1 argument required, but only 0 present.");var r=ga.getInstance(),n=new La(t,r,this);Jn.set(this,n)}return e}();["observe","unobserve","disconnect"].forEach(function(e){Xn.prototype[e]=function(){var t;return(t=Jn.get(this))[e].apply(t,arguments)}});var Aa=function(){return typeof nr.ResizeObserver!="undefined"?nr.ResizeObserver:Xn}(),Zn=Aa;var eo=new x,Ca=$(()=>k(new Zn(e=>{for(let t of e)eo.next(t)}))).pipe(g(e=>L(ze,k(e)).pipe(R(()=>e.disconnect()))),X(1));function he(e){return{width:e.offsetWidth,height:e.offsetHeight}}function ye(e){return Ca.pipe(S(t=>t.observe(e)),g(t=>eo.pipe(A(({target:r})=>r===e),R(()=>t.unobserve(e)),l(()=>he(e)))),V(he(e)))}function bt(e){return{width:e.scrollWidth,height:e.scrollHeight}}function ar(e){let t=e.parentElement;for(;t&&(e.scrollWidth<=t.scrollWidth&&e.scrollHeight<=t.scrollHeight);)t=(e=t).parentElement;return t?e:void 0}var to=new x,Ra=$(()=>k(new IntersectionObserver(e=>{for(let t of e)to.next(t)},{threshold:0}))).pipe(g(e=>L(ze,k(e)).pipe(R(()=>e.disconnect()))),X(1));function sr(e){return Ra.pipe(S(t=>t.observe(e)),g(t=>to.pipe(A(({target:r})=>r===e),R(()=>t.unobserve(e)),l(({isIntersecting:r})=>r))))}function ro(e,t=16){return dt(e).pipe(l(({y:r})=>{let n=he(e),o=bt(e);return r>=o.height-n.height-t}),J())}var cr={drawer:z("[data-md-toggle=drawer]"),search:z("[data-md-toggle=search]")};function no(e){return cr[e].checked}function Ke(e,t){cr[e].checked!==t&&cr[e].click()}function Ue(e){let t=cr[e];return b(t,"change").pipe(l(()=>t.checked),V(t.checked))}function ka(e,t){switch(e.constructor){case HTMLInputElement:return e.type==="radio"?/^Arrow/.test(t):!0;case HTMLSelectElement:case HTMLTextAreaElement:return!0;default:return e.isContentEditable}}function Ha(){return L(b(window,"compositionstart").pipe(l(()=>!0)),b(window,"compositionend").pipe(l(()=>!1))).pipe(V(!1))}function oo(){let e=b(window,"keydown").pipe(A(t=>!(t.metaKey||t.ctrlKey)),l(t=>({mode:no("search")?"search":"global",type:t.key,claim(){t.preventDefault(),t.stopPropagation()}})),A(({mode:t,type:r})=>{if(t==="global"){let n=_e();if(typeof n!="undefined")return!ka(n,r)}return!0}),pe());return Ha().pipe(g(t=>t?M:e))}function le(){return new URL(location.href)}function ot(e){location.href=e.href}function io(){return new x}function ao(e,t){if(typeof t=="string"||typeof t=="number")e.innerHTML+=t.toString();else if(t instanceof Node)e.appendChild(t);else if(Array.isArray(t))for(let r of t)ao(e,r)}function _(e,t,...r){let n=document.createElement(e);if(t)for(let o of Object.keys(t))typeof t[o]!="undefined"&&(typeof t[o]!="boolean"?n.setAttribute(o,t[o]):n.setAttribute(o,""));for(let o of r)ao(n,o);return n}function fr(e){if(e>999){let t=+((e-950)%1e3>99);return`${((e+1e-6)/1e3).toFixed(t)}k`}else return e.toString()}function so(){return location.hash.substring(1)}function Dr(e){let t=_("a",{href:e});t.addEventListener("click",r=>r.stopPropagation()),t.click()}function Pa(e){return L(b(window,"hashchange"),e).pipe(l(so),V(so()),A(t=>t.length>0),X(1))}function co(e){return Pa(e).pipe(l(t=>ce(`[id="${t}"]`)),A(t=>typeof t!="undefined"))}function Vr(e){let t=matchMedia(e);return er(r=>t.addListener(()=>r(t.matches))).pipe(V(t.matches))}function fo(){let e=matchMedia("print");return L(b(window,"beforeprint").pipe(l(()=>!0)),b(window,"afterprint").pipe(l(()=>!1))).pipe(V(e.matches))}function zr(e,t){return e.pipe(g(r=>r?t():M))}function ur(e,t={credentials:"same-origin"}){return ue(fetch(`${e}`,t)).pipe(fe(()=>M),g(r=>r.status!==200?Ot(()=>new Error(r.statusText)):k(r)))}function We(e,t){return ur(e,t).pipe(g(r=>r.json()),X(1))}function uo(e,t){let r=new DOMParser;return ur(e,t).pipe(g(n=>n.text()),l(n=>r.parseFromString(n,"text/xml")),X(1))}function pr(e){let t=_("script",{src:e});return $(()=>(document.head.appendChild(t),L(b(t,"load"),b(t,"error").pipe(g(()=>Ot(()=>new ReferenceError(`Invalid script: ${e}`))))).pipe(l(()=>{}),R(()=>document.head.removeChild(t)),ge(1))))}function po(){return{x:Math.max(0,scrollX),y:Math.max(0,scrollY)}}function lo(){return L(b(window,"scroll",{passive:!0}),b(window,"resize",{passive:!0})).pipe(l(po),V(po()))}function mo(){return{width:innerWidth,height:innerHeight}}function ho(){return b(window,"resize",{passive:!0}).pipe(l(mo),V(mo()))}function bo(){return G([lo(),ho()]).pipe(l(([e,t])=>({offset:e,size:t})),X(1))}function lr(e,{viewport$:t,header$:r}){let n=t.pipe(ee("size")),o=G([n,r]).pipe(l(()=>Xe(e)));return G([r,t,o]).pipe(l(([{height:i},{offset:s,size:a},{x:f,y:c}])=>({offset:{x:s.x-f,y:s.y-c+i},size:a})))}(()=>{function e(n,o){parent.postMessage(n,o||"*")}function t(...n){return n.reduce((o,i)=>o.then(()=>new Promise(s=>{let a=document.createElement("script");a.src=i,a.onload=s,document.body.appendChild(a)})),Promise.resolve())}var r=class extends EventTarget{constructor(n){super(),this.url=n,this.m=i=>{i.source===this.w&&(this.dispatchEvent(new MessageEvent("message",{data:i.data})),this.onmessage&&this.onmessage(i))},this.e=(i,s,a,f,c)=>{if(s===`${this.url}`){let u=new ErrorEvent("error",{message:i,filename:s,lineno:a,colno:f,error:c});this.dispatchEvent(u),this.onerror&&this.onerror(u)}};let o=document.createElement("iframe");o.hidden=!0,document.body.appendChild(this.iframe=o),this.w.document.open(),this.w.document.write(` + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Usage of MindYOLO callback function

+

Callback function: When the program runs to a certain mount point, all methods registered to the mount point at runtime will be automatically called. +The flexibility and extensibility of the program can be increased by using the callback function, because users can register custom methods to the mount point to be called without modifying the code in the program.

+

In MindYOLO, the callback function is specifically implemented in the mindyolo/utils/callback.py file. +

#mindyolo/utils/callback.py
+@CALLBACK_REGISTRY.registry_module()
+class callback_class_name(BaseCallback):
+    def __init__(self, **kwargs):
+        super().__init__()
+...
+def callback_fn_name(self, run_context: RunContext):
+    pass
+

+

Add a dictionary list under the callback field of the model's yaml file to implement the call +

#Callback function configuration dictionary:
+callback:
+- { name: callback_class_name, args: xx }
+- { name: callback_class_name2, args: xx }
+
+For example, take YOLOX as an example:

+

Add logic to the on_train_step_begin method in the YoloxSwitchTrain class in the mindyolo/utils/callback.py file to print "train step begin” log +

@CALLBACK_REGISTRY.registry_module()
+class YoloxSwitchTrain(BaseCallback):
+
+    def on_train_step_begin(self, run_context: RunContext):
+        # Custom logic
+        logger.info("train step begin")
+        pass
+
+Add the callback function under the callback field of the YOLOX corresponding yaml file configs/yolox/hyp.scratch.yaml +
callback:
+  - { name: YoloxSwitchTrain, switch_epoch_num: 285 }
+
+Then the logger.info("train step begin") statement will be executed before each training step is executed.

+

With the help of the callback function, users can customize the logic that needs to be executed at a certain mount point without having to understand the code of the complete training process.

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/how_to_guides/data_preparation/index.html b/en/how_to_guides/data_preparation/index.html new file mode 100644 index 00000000..782c4ffd --- /dev/null +++ b/en/how_to_guides/data_preparation/index.html @@ -0,0 +1,1230 @@ + + + + + + + + + + + + + + + + + + + + + + + + Data Preparation - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Data preparation

+

Dataset format introduction

+

Download coco2017 YOLO format coco2017labels-segments and coco2017 original images train2017 , val2017 , then put the coco2017 original images into the coco2017 YOLO format images directory: +

└─ coco2017_yolo
+    ├─ annotations
+        └─ instances_val2017.json
+    ├─ images
+        ├─ train2017   # coco2017 原始图片
+        └─ val2017     # coco2017 原始图片
+    ├─ labels
+        ├─ train2017
+        └─ val2017
+    ├─ train2017.txt
+    ├─ val2017.txt
+    └─ test-dev2017.txt
+
+Each line of the train.txt file corresponds to the relative path of a single image, for example: +
./images/train2017/00000000.jpg
+./images/train2017/00000001.jpg
+./images/train2017/00000002.jpg
+./images/train2017/00000003.jpg
+./images/train2017/00000004.jpg
+./images/train2017/00000005.jpg
+
+The txt files in the train2017 folder under labels are the annotation information of the corresponding images, supporting both detect and segment formats.

+

Detect format: Usually each row has 5 columns, corresponding to the category id and the center coordinates xy and width and height wh after normalization of the annotation box +

62 0.417040 0.206280 0.403600 0.412560
+62 0.818810 0.197933 0.174740 0.189680
+39 0.684540 0.277773 0.086240 0.358960
+0 0.620220 0.725853 0.751680 0.525840
+63 0.197190 0.364053 0.394380 0.669653
+39 0.932330 0.226240 0.034820 0.076640
+
+segment format: the first data in each line is the category id, followed by pairs of normalized coordinate points x, y

+

45 0.782016 0.986521 0.937078 0.874167 0.957297 0.782021 0.950562 0.739333 0.825844 0.561792 0.714609 0.420229 0.657297 0.391021 0.608422 0.4 0.0303438 0.750562 0.0016875 0.811229 0.003375 0.889896 0.0320156 0.986521
+45 0.557859 0.143813 0.487078 0.0314583 0.859547 0.00897917 0.985953 0.130333 0.984266 0.184271 0.930344 0.386521 0.80225 0.480896 0.763484 0.485396 0.684266 0.39775 0.670781 0.3955 0.679219 0.310104 0.642141 0.253937 0.561234 0.155063 0.559547 0.137083
+50 0.39 0.727063 0.418234 0.649417 0.455297 0.614125 0.476469 0.614125 0.51 0.590583 0.54 0.569417 0.575297 0.562354 0.601766 0.56 0.607062 0.536479 0.614125 0.522354 0.637063 0.501167 0.665297 0.48 0.69 0.477646 0.698828 0.494125 0.698828 0.534125 0.712938 0.529417 0.742938 0.548229 0.760594 0.564708 0.774703 0.550583 0.778234 0.536479 0.781766 0.531771 0.792359 0.541167 0.802937 0.555292 0.802937 0.569417 0.802937 0.576479 0.822359 0.576479 0.822359 0.597646 0.811766 0.607062 0.811766 0.618833 0.818828 0.637646 0.820594 0.656479 0.827641 0.687063 0.827641 0.703521 0.829406 0.727063 0.838234 0.708229 0.852359 0.729417 0.868234 0.750583 0.871766 0.792938 0.877063 0.821167 0.884125 0.861167 0.817062 0.92 0.734125 0.976479 0.711172 0.988229 0.48 0.988229 0.494125 0.967063 0.517062 0.912937 0.508234 0.832937 0.485297 0.788229 0.471172 0.774125 0.395297 0.729417
+45 0.375219 0.0678333 0.375219 0.0590833 0.386828 0.0503542 0.424156 0.0315208 0.440797 0.0281458 0.464 0.0389167 0.525531 0.115583 0.611797 0.222521 0.676359 0.306583 0.678875 0.317354 0.677359 0.385271 0.66475 0.394687 0.588594 0.407458 0.417094 0.517771 0.280906 0.604521 0.0806562 0.722208 0.0256719 0.763917 0.00296875 0.809646 0 0.786104 0 0.745083 0 0.612583 0.03525 0.613271 0.0877187 0.626708 0.130594 0.626708 0.170437 0.6025 0.273844 0.548708 0.338906 0.507 0.509906 0.4115 0.604734 0.359042 0.596156 0.338188 0.595141 0.306583 0.595141 0.291792 0.579516 0.213104 0.516969 0.129042 0.498297 0.100792 0.466516 0.0987708 0.448875 0.0786042 0.405484 0.0705208 0.375219 0.0678333 0.28675 0.108375 0.282719 0.123167 0.267078 0.162854 0.266062 0.189083 0.245391 0.199833 0.203516 0.251625 0.187375 0.269771 0.159641 0.240188 0.101125 0.249604 0 0.287271 0 0.250271 0 0.245563 0.0975938 0.202521 0.203516 0.145354 0.251953 0.123167 0.28675 0.108375
+49 0.587812 0.128229 0.612281 0.0965625 0.663391 0.0840833 0.690031 0.0908125 0.700109 0.10425 0.705859 0.133042 0.700109 0.143604 0.686422 0.146479 0.664828 0.153188 0.644672 0.157042 0.629563 0.175271 0.605797 0.181021 0.595 0.147437
+49 0.7405 0.178417 0.733719 0.173896 0.727781 0.162583 0.729484 0.150167 0.738812 0.124146 0.747281 0.0981458 0.776109 0.0811875 0.804094 0.0845833 0.814266 0.102667 0.818516 0.115104 0.812578 0.133208 0.782906 0.151292 0.754063 0.172771
+49 0.602656 0.178854 0.636125 0.167875 0.655172 0.165125 0.6665 0.162375 0.680391 0.155521 0.691719 0.153458 0.703047 0.154146 0.713859 0.162375 0.724156 0.174729 0.730844 0.193271 0.733422 0.217979 0.733938 0.244063 0.733422 0.281813 0.732391 0.295542 0.728266 0.300354 0.702016 0.294854 0.682969 0.28525 0.672156 0.270146
+49 0.716891 0.0519583 0.683766 0.0103958 0.611688 0.0051875 0.568828 0.116875 0.590266 0.15325 0.590266 0.116875 0.613641 0.0857083 0.631172 0.0857083 0.6565 0.083125 0.679875 0.0883125 0.691563 0.0961042 0.711031 0.0649375
+
+instances_val2017.json is the verification set annotation in coco format, which can directly call coco api for map calculation.

+

During training & reasoning, you need to modify train_set, val_set, test_set in configs/coco.yaml to the actual data path

+

For actual examples of using MindYOLO kit to complete custom dataset finetune, please refer to Finetune

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/how_to_guides/write_a_new_model/index.html b/en/how_to_guides/write_a_new_model/index.html new file mode 100644 index 00000000..02f6734e --- /dev/null +++ b/en/how_to_guides/write_a_new_model/index.html @@ -0,0 +1,1552 @@ + + + + + + + + + + + + + + + + + + + + + + + + Write A New Model - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Model Writing Guide

+

This document provides a tutorial for writing custom models for MindYOLO.
+It is divided into three parts:

+
    +
  • Model definition: We can define a network directly or use a yaml file to define a network.
  • +
  • Register model: Optional. After registration, you can use the file name in the create_model interface to create a custom model
  • +
  • Verification: Verify whether the model is operational
  • +
+

Model definition

+

1. Use python code directly to write the network

+

Module import

+

Import the nn module and ops module in the MindSpore framework to define the components and operations of the neural network. +

import mindspore.nn as nn
+import mindspore.ops.operations as ops
+

+

Create a model

+

Define a model class MyModel that inherits from nn.Cell. In the constructor init, define the various components of the model:

+
class MyModel(nn.Cell):
+    def __init__(self):
+        super(MyModel, self).__init__()
+        #conv1是一个2D卷积层,输入通道数为3,输出通道数为16,卷积核大小为3x3,步长为1,填充为1。
+        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
+        #relu是一个ReLU激活函数操作。
+        self.relu = ops.ReLU()
+        #axpool是一个2D最大池化层,池化窗口大小为2x2,步长为2。
+        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
+        #conv2是另一个2D卷积层,输入通道数为16,输出通道数为32,卷积核大小为3x3,步长为1,填充为1。
+        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
+        #fc是一个全连接层,输入特征维度为32x8x8,输出特征维度为10。
+        self.fc = nn.Dense(32 * 8 * 8, 10)
+
+    #在construct方法中,定义了模型的前向传播过程。输入x经过卷积、激活函数、池化等操作后,通过展平操作将特征张量变为一维向量,然后通过全连接层得到最终的输出结果。    
+    def construct(self, x): 
+        x = self.conv1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.conv2(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = x.view(x.shape[0], -1)
+        x = self.fc(x)
+        return x
+
+

Create a model instance

+

By instantiating the MyModel class, create a model instance model, which can be used for model training and reasoning later. +

model = MyModel()
+

+

2. Use yaml file to write network

+

Usually need the following three steps:

+
    +
  • Create a new mymodel.yaml file
  • +
  • Create a corresponding mymodel.py file
  • +
  • Introduce the model in the mindyolo/models/init.py file
  • +
+

Here is a detailed guide to writing mymodel.yaml file:
+Take writing a simple network as an example: +Write the necessary parameters in yaml format, and then use these parameters in the mymodel.py file. +The network part is the model network
+[[from, number, module, args], ...]: Each element represents the configuration of a network layer.
+

# The yaml in __BASE__ indicates the base configuration file for inheritance. Repeated parameters will be overwritten by the current file;
+__BASE__:
+-'../coco.yaml'
+-'./hyp.scratch-high.yaml'
+
+per_batch_size: 32
+img_size: 640
+sync_bn: False
+
+network:
+model_name: mymodel
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+stride: [ 8, 16, 32 ]
+
+# Configuration of the backbone network. The meaning of each layer is
+# [from, number, module, args]
+# Take the first layer as an example, [-1, 1, ConvNormAct, [32, 3, 1]], which means the input comes from `-1` (the previous layer), the number of repetitions is 1, and the module name is ConvNormAct, module input parameters are [32, 3, 1];
+backbone:
+[[-1, 1, ConvNormAct, [32, 3, 1]], # 0
+[-1, 1, ConvNormAct, [64, 3, 2]], # 1-P1/2
+[-1, 1, Bottleneck, [64]],
+[-1, 1, ConvNormAct, [128, 3, 2]], # 3-P2/4
+[-1, 2, Bottleneck, [128]],
+[-1, 1, ConvNormAct, [256, 3, 2]], # 5-P3/8
+[-1, 8, Bottleneck, [256]],
+]
+
+#head part configuration
+head:
+[
+[ -1, 1, ConvNormAct, [ 512, 3, 2 ] ], # 7-P4/16
+[ -1, 8, Bottleneck, [ 512 ] ],
+[ -1, 1, ConvNormAct, [ 1024, 3, 2 ] ], # 9-P5/32
+[ -1, 4, Bottleneck, [ 1024 ] ], # 10
+]
+

+

Write mymodel.py file:

+

Module import

+

It is necessary to import modules in the package. For example, from .registry import register_model, etc.

+
import numpy as np
+
+import mindspore as ms
+from mindspore import Tensor, nn
+
+from .initializer import initialize_defult #Used to initialize the default parameters of the model, including weight initialization method, BN layer parameters, etc.
+from .model_factory import build_model_from_cfg #Used to build a target detection model according to the parameters in the YAML configuration file and return an instance of the model.
+from .registry import register_model #Used to register a custom model in Mindyolo for use in the YAML configuration file.
+
+#Visibility declaration
+__all__ = ["MYmodel", "mymodel"]
+
+

Create a configuration dictionary

+

The _cfg function is an auxiliary function used to create a configuration dictionary. It accepts a url parameter and other keyword parameters and returns a dictionary containing the url and other parameters.
+default_cfgs is a dictionary used to store default configurations. Here, mymodel is used as the key to create a configuration dictionary using the _cfg function. +

def _cfg(url="", **kwargs):
+    return {"url": url, **kwargs}
+
+default_cfgs = {"mymodel": _cfg(url="")}
+

+

Create a model

+

In MindSpore, the model class inherits from nn.Cell. Generally, the following two functions need to be overloaded:

+
    +
  • In the __init__ function, the module layer needed in the model should be defined.
  • +
  • In the construct function, define the model forward logic.
  • +
+
class MYmodel(nn.Cell):
+
+    def __init__(self, cfg, in_channels=3, num_classes=None, sync_bn=False):
+        super(MYmodel, self).__init__()
+        self.cfg = cfg
+        self.stride = Tensor(np.array(cfg.stride), ms.int32)
+        self.stride_max = int(max(self.cfg.stride))
+        ch, nc = in_channels, num_classes
+
+        self.nc = nc  # override yaml value
+        self.model = build_model_from_cfg(model_cfg=cfg, in_channels=ch, num_classes=nc, sync_bn=sync_bn)
+        self.names = [str(i) for i in range(nc)]  # default names
+
+        initialize_defult()  # 可选,你可能需要initialize_defult方法以获得和pytorch一样的conv2d、dense层的初始化方式;
+
+    def construct(self, x):
+        return self.model(x)
+
+

Register model (optional)

+

If you need to use the mindyolo interface to initialize a custom model, you need to first register and import the model

+

Model registration
+

@register_model #The registered model can be accessed by the create_model interface as a model name;
+def mymodel(cfg, in_channels=3, num_classes=None, **kwargs) -> MYmodel:
+    """Get GoogLeNet model.
+    Refer to the base class `models.GoogLeNet` for more details."""
+    model = MYmodel(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+Model import

+
#Add the following code to the mindyolo/models/_init_.py file
+
+from . import mymodel #mymodel.py files are usually placed in the mindyolo/models/directory
+__all__.extend(mymodel.__all__)
+from .mymodel import *
+
+

Verify main

+

The initial writing phase should ensure that the model is runnable. Basic verification can be performed through the following code block: +First import the required modules and functions. Then, parse the configuration object.

+

if __name__ == "__main__":
+    from mindyolo.models.model_factory import create_model
+    from mindyolo.utils.config import parse_config
+
+    opt = parse_config()
+
+Create a model and specify related parameters. Note: If you want to use the file name to create a custom model in create_model, you need to register it using the register @register_model first. Please refer to the above Register model (optional) section +
    model = create_model(
+        model_name="mymodel",
+        model_cfg=opt.net,
+        num_classes=opt.data.nc,
+        sync_bn=opt.sync_bn if hasattr(opt, "sync_bn") else False,
+    )
+

+

Otherwise, please use import to introduce the model

+

    from mindyolo.models.mymodel import MYmodel
+    model = MYmodel(
+        model_name="mymodel",
+        model_cfg=opt.net,
+        num_classes=opt.data.nc,
+        sync_bn=opt.sync_bn if hasattr(opt, "sync_bn") else False,
+    ) 
+
+Finally, create an input tensor x and pass it to the model for forward computation. +
    x = Tensor(np.random.randn(1, 3, 640, 640), ms.float32)
+    out = model(x)
+    out = out[0] if isinstance(out, (list, tuple)) else out
+    print(f"Output shape is {[o.shape for o in out]}")
+

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/index.html b/en/index.html new file mode 100644 index 00000000..453438e3 --- /dev/null +++ b/en/index.html @@ -0,0 +1,1379 @@ + + + + + + + + + + + + + + + + + + + + + + Home - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

MindYOLO

+

+ + docs + + + GitHub + + + PRs Welcome + +

+ +

MindYOLO implements state-of-the-art YOLO series algorithms based on MindSpore. +The following is the corresponding mindyolo versions and supported mindspore versions.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
mindyolomindspore
mastermaster
0.42.3.0
0.32.2.10
0.22.0
0.11.8
+

+

Benchmark and Model Zoo

+

See Benchmark Results.

+

supported model list

+ +

Installation

+

See INSTALLATION for details.

+

Getting Started

+

See QUICK START for details.

+

Notes

+

⚠️ The current version is based on the static shape of GRAPH. +The dynamic shape will be supported later. Please look forward to it.

+

How to Contribute

+

We appreciate all contributions including issues and PRs to make MindYOLO better.

+

Please refer to CONTRIBUTING for the contributing guideline.

+

License

+

MindYOLO is released under the Apache License 2.0.

+

Acknowledgement

+

MindYOLO is an open source project that welcome any contribution and feedback. We wish that the toolbox and benchmark could support the growing research community, reimplement existing methods, and develop their own new real-time object detection methods by providing a flexible and standardized toolkit.

+

Citation

+

If you find this project useful in your research, please consider cite:

+
@misc{MindSpore Object Detection YOLO 2023,
+    title={{MindSpore Object Detection YOLO}:MindSpore Object Detection YOLO Toolbox and Benchmark},
+    author={MindSpore YOLO Contributors},
+    howpublished = {\url{https://github.com/mindspore-lab/mindyolo}},
+    year={2023}
+}
+
+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/installation/index.html b/en/installation/index.html new file mode 100644 index 00000000..b25edc00 --- /dev/null +++ b/en/installation/index.html @@ -0,0 +1,1295 @@ + + + + + + + + + + + + + + + + + + + + + + + + Installation - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Installation

+

Dependency

+
    +
  • mindspore >= 2.3
  • +
  • numpy >= 1.17.0
  • +
  • pyyaml >= 5.3
  • +
  • openmpi 4.0.3 (for distributed mode)
  • +
+

To install the dependency, please run

+
pip install -r requirements.txt
+
+

MindSpore can be easily installed by following the official instructions where you can select your hardware platform for the best fit. To run in distributed mode, openmpi is required to install.

+

⚠️ The current version only supports the Ascend platform, and the GPU platform will be supported later.

+

Install with PyPI

+

MindYOLO is published as a Python package and can be installed with pip, ideally by using a virtual environment. Open up a terminal and install MindYOLO with:

+
pip install mindyolo
+
+

Install from Source (Bleeding Edge Version)

+

from VCS

+
pip install git+https://github.com/mindspore-lab/mindyolo.git
+
+

from local src

+

As this project is in active development, if you are a developer or contributor, please prefer this installation!

+

MindYOLO can be directly used from GitHub by cloning the repository into a local folder which might be useful if you want to use the very latest version:

+
git clone https://github.com/mindspore-lab/mindyolo.git
+
+

After cloning from git, it is recommended that you install using "editable" mode, which can help resolve potential module import issues:

+
cd mindyolo
+pip install -e .
+
+

In addition, we provide an optional fast coco api to improve eval speed. The code is provided in C++, and you can try compiling with the following commands (This operation is optional) :

+
cd mindyolo/csrc
+sh build.sh
+
+

We also provide fused GPU operators which are built upon MindSpore ops.Custom API. The fused GPU operators are able to improve train speed. The source code is provided in C++ and CUDA and is in the folder examples/custom_gpu_op/. To enable this feature in the GPU training process, you shall modify the method bbox_iou in mindyolo/models/losses/iou_loss.py by referring to the demo script examples/custom_gpu_op/iou_loss_fused.py. Before runing iou_loss_fused.py, you shall compile the C++ and CUDA source code to dynamic link libraries with the following commands (This operation is optional) :

+
bash examples/custom_gpu_op/fused_op/build.sh
+
+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/modelzoo/benchmark/index.html b/en/modelzoo/benchmark/index.html new file mode 100644 index 00000000..bd42e123 --- /dev/null +++ b/en/modelzoo/benchmark/index.html @@ -0,0 +1,1668 @@ + + + + + + + + + + + + + + + + + + + + + + + + Benchmark - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Benchmark

+

Detection

+
+performance tested on Ascend 910(8p) with graph mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ParamsRecipeDownload
YOLOv8N16 * 8640MS COCO 201737.23.2Myamlweights
YOLOv8S16 * 8640MS COCO 201744.611.2Myamlweights
YOLOv8M16 * 8640MS COCO 201750.525.9Myamlweights
YOLOv8L16 * 8640MS COCO 201752.843.7Myamlweights
YOLOv8X16 * 8640MS COCO 201753.768.2Myamlweights
YOLOv7Tiny16 * 8640MS COCO 201737.56.2Myamlweights
YOLOv7L16 * 8640MS COCO 201750.836.9Myamlweights
YOLOv7X12 * 8640MS COCO 201752.471.3Myamlweights
YOLOv5N32 * 8640MS COCO 201727.31.9Myamlweights
YOLOv5S32 * 8640MS COCO 201737.67.2Myamlweights
YOLOv5M32 * 8640MS COCO 201744.921.2Myamlweights
YOLOv5L32 * 8640MS COCO 201748.546.5Myamlweights
YOLOv5X16 * 8640MS COCO 201750.586.7Myamlweights
YOLOv4CSPDarknet5316 * 8608MS COCO 201745.427.6Myamlweights
YOLOv4CSPDarknet53(silu)16 * 8608MS COCO 201745.827.6Myamlweights
YOLOv3Darknet5316 * 8640MS COCO 201745.561.9Myamlweights
YOLOXN8 * 8416MS COCO 201724.10.9Myamlweights
YOLOXTiny8 * 8416MS COCO 201733.35.1Myamlweights
YOLOXS8 * 8640MS COCO 201740.79.0Myamlweights
YOLOXM8 * 8640MS COCO 201746.725.3Myamlweights
YOLOXL8 * 8640MS COCO 201749.254.2Myamlweights
YOLOXX8 * 8640MS COCO 201751.699.1Myamlweights
YOLOXDarknet538 * 8640MS COCO 201747.763.7Myamlweights
+
+
+performance tested on Ascend 910*(8p) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ms/stepParamsRecipeDownload
YOLOv8N16 * 8640MS COCO 201737.3373.553.2Myamlweights
YOLOv8S16 * 8640MS COCO 201744.7365.5311.2Myamlweights
YOLOv7Tiny16 * 8640MS COCO 201737.5496.216.2Myamlweights
YOLOv5N32 * 8640MS COCO 201727.4736.081.9Myamlweights
YOLOv5S32 * 8640MS COCO 201737.6787.347.2Myamlweights
YOLOv4CSPDarknet5316 * 8608MS COCO 201746.1337.2527.6Myamlweights
YOLOv3Darknet5316 * 8640MS COCO 201746.6396.6061.9Myamlweights
YOLOXS8 * 8640MS COCO 201741.0242.159.0Myamlweights
+
+

Segmentation

+
+performance tested on Ascend 910(8p) with graph mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)Mask mAP (%)ParamsRecipeDownload
YOLOv8-segX16 * 8640MS COCO 201752.542.971.8Myamlweights
+
+

Deploy inference

+ +

Notes

+
    +
  • Box mAP: Accuracy reported on the validation set.
  • +
+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/modelzoo/yolov3/index.html b/en/modelzoo/yolov3/index.html new file mode 100644 index 00000000..aa3db78d --- /dev/null +++ b/en/modelzoo/yolov3/index.html @@ -0,0 +1,1483 @@ + + + + + + + + + + + + + + + + + + + + + + + + YOLOv3 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

YOLOv3

+
+

YOLOv3: An Incremental Improvement

+
+

Abstract

+

We present some updates to YOLO! We made a bunch of little design changes to make it better. We also trained this new network that's pretty swell. It's a little bigger than last time but more accurate. It's still fast though, don't worry. At 320x320 YOLOv3 runs in 22 ms at 28.2 mAP, as accurate as SSD but three times faster. When we look at the old .5 IOU mAP detection metric YOLOv3 is quite good. It achieves 57.9 mAP@50 in 51 ms on a Titan X, compared to 57.5 mAP@50 in 198 ms by RetinaNet, similar performance but 3.8x faster.

+
+ +
+ +

Results

+
+performance tested on Ascend 910(8p) with graph mode + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ParamsRecipeDownload
YOLOv3Darknet5316 * 8640MS COCO 201745.561.9Myamlweights
+
+
+performance tested on Ascend 910*(8p) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ms/stepParamsRecipeDownload
YOLOv3Darknet5316 * 8640MS COCO 201746.6396.6061.9Myamlweights
+
+


+

Notes

+
    +
  • Box mAP: Accuracy reported on the validation set.
  • +
  • We referred to a commonly used third-party YOLOv3 implementation.
  • +
+

Quick Start

+

Please refer to the QUICK START in MindYOLO for details.

+

Training

+

- Pretraining Model

+

You can get the pre-training model from here.

+

To convert it to a loadable ckpt file for mindyolo, please put it in the root directory then run it +

python mindyolo/utils/convert_weight_darknet53.py
+

+

- Distributed Training

+

It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run +

# distributed training on multiple GPU/Ascend devices
+mpirun -n 8 python train.py --config ./configs/yolov3/yolov3.yaml --device_target Ascend --is_parallel True
+

+
+

If the script is executed by the root user, the --allow-run-as-root parameter must be added to mpirun.

+
+

Similarly, you can train the model on multiple GPU devices with the above mpirun command.

+

For detailed illustration of all hyper-parameters, please refer to config.py.

+

Note: As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size.

+

- Standalone Training

+

If you want to train or finetune the model on a smaller dataset without distributed training, please run:

+
# standalone training on a CPU/GPU/Ascend device
+python train.py --config ./configs/yolov3/yolov3.yaml --device_target Ascend
+
+

Validation and Test

+

To validate the accuracy of the trained model, you can use test.py and parse the checkpoint path with --weight.

+
python test.py --config ./configs/yolov3/yolov3.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt
+
+

Deployment

+

See here.

+

References

+ +

[1] Jocher Glenn. YOLOv3 release v9.1. https://github.com/ultralytics/yolov3/releases/tag/v9.1, 2021. +[2] Joseph Redmon and Ali Farhadi. YOLOv3: An incremental improvement. arXiv preprint arXiv:1804.02767, 2018.

+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/modelzoo/yolov4/index.html b/en/modelzoo/yolov4/index.html new file mode 100644 index 00000000..ce22d437 --- /dev/null +++ b/en/modelzoo/yolov4/index.html @@ -0,0 +1,1526 @@ + + + + + + + + + + + + + + + + + + + + + + + + YOLOv4 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

YOLOv4

+
+

YOLOv4: Optimal Speed and Accuracy of Object Detection

+
+

Abstract

+

There are a huge number of features which are said to +improve Convolutional Neural Network (CNN) accuracy. +Practical testing of combinations of such features on large +datasets, and theoretical justification of the result, is required. Some features operate on certain models exclusively +and for certain problems exclusively, or only for small-scale +datasets; while some features, such as batch-normalization +and residual-connections, are applicable to the majority of +models, tasks, and datasets. We assume that such universal +features include Weighted-Residual-Connections (WRC), +Cross-Stage-Partial-connections (CSP), Cross mini-Batch +Normalization (CmBN), Self-adversarial-training (SAT) +and Mish-activation. We use new features: WRC, CSP, +CmBN, SAT, Mish activation, Mosaic data augmentation, +CmBN, DropBlock regularization, and CIoU loss, and combine some of them to achieve state-of-the-art results: 43.5% +AP (65.7% AP50) for the MS COCO dataset at a realtime speed of 65 FPS on Tesla V100.

+
+ +
+ +

Results

+
+performance tested on Ascend 910(8p) with graph mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ParamsRecipeDownload
YOLOv4CSPDarknet5316 * 8608MS COCO 201745.427.6Myamlweights
YOLOv4CSPDarknet53(silu)16 * 8608MS COCO 201745.827.6Myamlweights
+
+
+performance tested on Ascend 910*(8p) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ms/stepParamsRecipeDownload
YOLOv4CSPDarknet5316 * 8608MS COCO 201746.1337.2527.6Myamlweights
+
+


+

Notes

+
    +
  • Box mAP: Accuracy reported on the validation set.
  • +
+

Quick Start

+

Please refer to the QUICK START in MindYOLO for details.

+

Training

+

- Pretraining Model

+

You can get the pre-training model trained on ImageNet2012 from here.

+

To convert it to a loadable ckpt file for mindyolo, please put it in the root directory then run it +

python mindyolo/utils/convert_weight_cspdarknet53.py
+

+

- Distributed Training

+

It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run +

# distributed training on multiple GPU/Ascend devices
+mpirun -n 8 python train.py --config ./configs/yolov4/yolov4-silu.yaml --device_target Ascend --is_parallel True --epochs 320
+

+
+

If the script is executed by the root user, the --allow-run-as-root parameter must be added to mpirun.

+
+

Similarly, you can train the model on multiple GPU devices with the above mpirun command.

+

For detailed illustration of all hyper-parameters, please refer to config.py.

+

Notes

+
    +
  • As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size.
  • +
  • If the following warning occurs, setting the environment variable PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning' will fix it. +
    multiprocessing/semaphore_tracker.py: 144 UserWarning: semaphore_tracker: There appear to be 235 leaked semaphores to clean up at shutdown len(cache))
    +
  • +
+

- Standalone Training

+

If you want to train or finetune the model on a smaller dataset without distributed training, please run:

+
# standalone training on a CPU/GPU/Ascend device
+python train.py --config ./configs/yolov4/yolov4-silu.yaml --device_target Ascend --epochs 320
+
+

Validation and Test

+

To validate the accuracy of the trained model, you can use test.py and parse the checkpoint path with --weight.

+
python test.py --config ./configs/yolov4/yolov4-silu.yaml --device_target Ascend --iou_thres 0.6 --weight /PATH/TO/WEIGHT.ckpt
+
+

Deployment

+

See here.

+

References

+ +

[1] Alexey Bochkovskiy, Chien-Yao Wang and Ali Farhadi. YOLOv4: Optimal Speed and Accuracy of Object Detection. arXiv preprint arXiv:2004.10934, 2020.

+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/modelzoo/yolov5/index.html b/en/modelzoo/yolov5/index.html new file mode 100644 index 00000000..766fc4ea --- /dev/null +++ b/en/modelzoo/yolov5/index.html @@ -0,0 +1,1516 @@ + + + + + + + + + + + + + + + + + + + + + + + + YOLOv5 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

YOLOv5

+

Abstract

+

YOLOv5 is a family of object detection architectures and models pretrained on the COCO dataset, representing Ultralytics open-source research into future vision AI methods, incorporating lessons learned and best practices evolved over thousands of hours of research and development.

+
+ +
+ +

Results

+
+performance tested on Ascend 910(8p) with graph mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ParamsRecipeDownload
YOLOv5N32 * 8640MS COCO 201727.31.9Myamlweights
YOLOv5S32 * 8640MS COCO 201737.67.2Myamlweights
YOLOv5M32 * 8640MS COCO 201744.921.2Myamlweights
YOLOv5L32 * 8640MS COCO 201748.546.5Myamlweights
YOLOv5X16 * 8640MS COCO 201750.586.7Myamlweights
+
+
+performance tested on Ascend 910*(8p) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ms/stepParamsRecipeDownload
YOLOv5N32 * 8640MS COCO 201727.4736.081.9Myamlweights
YOLOv5S32 * 8640MS COCO 201737.6787.347.2Myamlweights
+
+


+

Notes

+
    +
  • Box mAP: Accuracy reported on the validation set.
  • +
  • We refer to the official YOLOV5 to reproduce the P5 series model, and the differences are as follows: We use 8x NPU(Ascend910) for training, and the single-NPU batch size is 32. This is different from the official code.
  • +
+

Quick Start

+

Please refer to the QUICK START in MindYOLO for details.

+

Training

+

- Distributed Training

+

It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run +

# distributed training on multiple GPU/Ascend devices
+mpirun -n 8 python train.py --config ./configs/yolov5/yolov5n.yaml --device_target Ascend --is_parallel True
+

+
+

If the script is executed by the root user, the --allow-run-as-root parameter must be added to mpirun.

+
+

Similarly, you can train the model on multiple GPU devices with the above mpirun command.

+

For detailed illustration of all hyper-parameters, please refer to config.py.

+

Note: As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size.

+

- Standalone Training

+

If you want to train or finetune the model on a smaller dataset without distributed training, please run:

+
# standalone training on a CPU/GPU/Ascend device
+python train.py --config ./configs/yolov5/yolov5n.yaml --device_target Ascend
+
+

Validation and Test

+

To validate the accuracy of the trained model, you can use test.py and parse the checkpoint path with --weight.

+
python test.py --config ./configs/yolov5/yolov5n.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt
+
+

Deployment

+

See here.

+

References

+ +

[1] Jocher Glenn. YOLOv5 release v6.1. https://github.com/ultralytics/yolov5/releases/tag/v6.1, 2022.

+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/modelzoo/yolov7/index.html b/en/modelzoo/yolov7/index.html new file mode 100644 index 00000000..93129ea7 --- /dev/null +++ b/en/modelzoo/yolov7/index.html @@ -0,0 +1,1486 @@ + + + + + + + + + + + + + + + + + + + + + + + + YOLOv7 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

YOLOv7

+
+

YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors

+
+

Abstract

+

YOLOv7 surpasses all known object detectors in both speed and accuracy in the range from 5 FPS to 160 FPS and has the highest accuracy 56.8% AP among all known real-time object detectors with 30 FPS or higher on GPU V100. YOLOv7-E6 object detector (56 FPS V100, 55.9% AP) outperforms both transformer-based detector SWIN-L Cascade-Mask R-CNN (9.2 FPS A100, 53.9% AP) by 509% in speed and 2% in accuracy, and convolutional-based detector ConvNeXt-XL Cascade-Mask R-CNN (8.6 FPS A100, 55.2% AP) by 551% in speed and 0.7% AP in accuracy, as well as YOLOv7 outperforms: YOLOR, YOLOX, Scaled-YOLOv4, YOLOv5, DETR, Deformable DETR, DINO-5scale-R50, ViT-Adapter-B and many other object detectors in speed and accuracy. Moreover, we train YOLOv7 only on MS COCO dataset from scratch without using any other datasets or pre-trained weights.

+
+ +
+ +

Results

+
+performance tested on Ascend 910(8p) with graph mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ParamsRecipeDownload
YOLOv7Tiny16 * 8640MS COCO 201737.56.2Myamlweights
YOLOv7L16 * 8640MS COCO 201750.836.9Myamlweights
YOLOv7X12 * 8640MS COCO 201752.471.3Myamlweights
+
+
+performance tested on Ascend 910*(8p) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ms/stepParamsRecipeDownload
YOLOv7Tiny16 * 8640MS COCO 201737.5496.216.2Myamlweights
+
+


+

Notes

+
    +
  • Context: Training context denoted as {device}x{pieces}-{MS mode}, where mindspore mode can be G - graph mode or F - pynative mode with ms function. For example, D910x8-G is for training on 8 pieces of Ascend 910 NPU using graph mode.
  • +
  • Box mAP: Accuracy reported on the validation set.
  • +
  • We refer to the official YOLOV7 to reproduce the P5 series model, and the differences are as follows: We use 8x NPU(Ascend910) for training, and the single-NPU batch size for tiny/l/x is 16/16/12. This is different from the official code.
  • +
+

Quick Start

+

Please refer to the QUICK START in MindYOLO for details.

+

Training

+

- Distributed Training

+

It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run +

# distributed training on multiple GPU/Ascend devices
+mpirun -n 8 python train.py --config ./configs/yolov7/yolov7.yaml --device_target Ascend --is_parallel True
+

+
+

If the script is executed by the root user, the --allow-run-as-root parameter must be added to mpirun.

+
+

Similarly, you can train the model on multiple GPU devices with the above mpirun command.

+

For detailed illustration of all hyper-parameters, please refer to config.py.

+

Note: As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size.

+

- Standalone Training

+

If you want to train or finetune the model on a smaller dataset without distributed training, please run:

+
# standalone training on a CPU/GPU/Ascend device
+python train.py --config ./configs/yolov7/yolov7.yaml --device_target Ascend
+
+

Validation and Test

+

To validate the accuracy of the trained model, you can use test.py and parse the checkpoint path with --weight.

+
python test.py --config ./configs/yolov7/yolov7.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt
+
+

Deployment

+

See here.

+

References

+ +

[1] Chien-Yao Wang, Alexey Bochkovskiy, and HongYuan Mark Liao. Yolov7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. arXiv preprint arXiv:2207.02696, 2022.

+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/modelzoo/yolov8/index.html b/en/modelzoo/yolov8/index.html new file mode 100644 index 00000000..192f16cf --- /dev/null +++ b/en/modelzoo/yolov8/index.html @@ -0,0 +1,1578 @@ + + + + + + + + + + + + + + + + + + + + + + + + YOLOv8 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

YOLOv8

+

Abstract

+

Ultralytics YOLOv8, developed by Ultralytics, is a cutting-edge, state-of-the-art (SOTA) model that builds upon the success of previous YOLO versions and introduces new features and improvements to further boost performance and flexibility. YOLOv8 is designed to be fast, accurate, and easy to use, making it an excellent choice for a wide range of object detection, image segmentation and image classification tasks.

+
+ +
+ +

Results

+

Detection

+
+performance tested on Ascend 910(8p) with graph mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ParamsRecipeDownload
YOLOv8N16 * 8640MS COCO 201737.23.2Myamlweights
YOLOv8S16 * 8640MS COCO 201744.611.2Myamlweights
YOLOv8M16 * 8640MS COCO 201750.525.9Myamlweights
YOLOv8L16 * 8640MS COCO 201752.843.7Myamlweights
YOLOv8X16 * 8640MS COCO 201753.768.2Myamlweights
+
+
+performance tested on Ascend 910*(8p) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ms/stepParamsRecipeDownload
YOLOv8N16 * 8640MS COCO 201737.3373.553.2Myamlweights
YOLOv8S16 * 8640MS COCO 201744.7365.5311.2Myamlweights
+
+

Segmentation

+
+performance tested on Ascend 910(8p) with graph mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)Mask mAP (%)ParamsRecipeDownload
YOLOv8-segX16 * 8640MS COCO 201752.542.971.8Myamlweights
+
+

Notes

+
    +
  • Box mAP: Accuracy reported on the validation set.
  • +
  • We refer to the official YOLOV8 to reproduce the P5 series model.
  • +
+

Quick Start

+

Please refer to the QUICK START in MindYOLO for details.

+

Training

+

- Distributed Training

+

It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run +

# distributed training on multiple GPU/Ascend devices
+mpirun -n 8 python train.py --config ./configs/yolov8/yolov8n.yaml --device_target Ascend --is_parallel True
+

+
+

If the script is executed by the root user, the --allow-run-as-root parameter must be added to mpirun.

+
+

Similarly, you can train the model on multiple GPU devices with the above mpirun command.

+

For detailed illustration of all hyper-parameters, please refer to config.py.

+

Note: As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size.

+

- Standalone Training

+

If you want to train or finetune the model on a smaller dataset without distributed training, please run:

+
# standalone training on a CPU/GPU/Ascend device
+python train.py --config ./configs/yolov8/yolov8n.yaml --device_target Ascend
+
+

Validation and Test

+

To validate the accuracy of the trained model, you can use test.py and parse the checkpoint path with --weight.

+
python test.py --config ./configs/yolov8/yolov8n.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt
+
+

Deployment

+

See here.

+

References

+ +

[1] Jocher Glenn. Ultralytics YOLOv8. https://github.com/ultralytics/ultralytics, 2023.

+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/modelzoo/yolox/index.html b/en/modelzoo/yolox/index.html new file mode 100644 index 00000000..ec9aaf27 --- /dev/null +++ b/en/modelzoo/yolox/index.html @@ -0,0 +1,1526 @@ + + + + + + + + + + + + + + + + + + + + + + + + YOLOx - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

YOLOX

+

Abstract

+

YOLOX is a new high-performance detector with some experienced improvements to YOLO series. We switch the YOLO detector to an anchor-free manner and conduct other advanced detection techniques, i.e., a decoupled head and the leading label assignment strategy SimOTA to achieve state-of-the-art results across a large scale range of models: For YOLO-Nano with only 0.91M parameters and 1.08G FLOPs, we get 25.3% AP on COCO, surpassing NanoDet by 1.8% AP; for YOLOv3, one of the most widely used detectors in industry, we boost it to 47.3% AP on COCO, outperforming the current best practice by 3.0% AP; for YOLOX-L with roughly the same amount of parameters as YOLOv4-CSP, YOLOv5-L, we achieve 50.0% AP on COCO at a speed of 68.9 FPS on Tesla V100, exceeding YOLOv5-L by 1.8% AP. Further, we won the 1st Place on Streaming Perception Challenge (Workshop on Autonomous Driving at CVPR 2021) using a single YOLOX-L model.

+
+ +
+ +

Results

+
+performance tested on Ascend 910(8p) with graph mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ParamsRecipeDownload
YOLOXN8 * 8416MS COCO 201724.10.9Myamlweights
YOLOXTiny8 * 8416MS COCO 201733.35.1Myamlweights
YOLOXS8 * 8640MS COCO 201740.79.0Myamlweights
YOLOXM8 * 8640MS COCO 201746.725.3Myamlweights
YOLOXL8 * 8640MS COCO 201749.254.2Myamlweights
YOLOXX8 * 8640MS COCO 201751.699.1Myamlweights
YOLOXDarknet538 * 8640MS COCO 201747.763.7Myamlweights
+
+
+performance tested on Ascend 910*(8p) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ms/stepParamsRecipeDownload
YOLOXS8 * 8640MS COCO 201741.0242.159.0Myamlweights
+
+


+

Notes

+
    +
  • Box mAP: Accuracy reported on the validation set.
  • +
  • We refer to the official YOLOX to reproduce the results.
  • +
+

Quick Start

+

Please refer to the QUICK START in MindYOLO for details.

+

Training

+

- Distributed Training

+

It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run +

# distributed training on multiple GPU/Ascend devices
+mpirun -n 8 python train.py --config ./configs/yolox/yolox-s.yaml --device_target Ascend --is_parallel True
+

+
+

If the script is executed by the root user, the --allow-run-as-root parameter must be added to mpirun.

+
+

Similarly, you can train the model on multiple GPU devices with the above mpirun command.

+

For detailed illustration of all hyper-parameters, please refer to config.py.

+

Note: As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction.

+

- Standalone Training

+

If you want to train or finetune the model on a smaller dataset without distributed training, please firstly run:

+
# standalone 1st stage training on a CPU/GPU/Ascend device
+python train.py --config ./configs/yolox/yolox-s.yaml --device_target Ascend
+
+

Validation and Test

+

To validate the accuracy of the trained model, you can use test.py and parse the checkpoint path with --weight.

+
python test.py --config ./configs/yolox/yolox-s.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt
+
+

Deployment

+

See here.

+

References

+ +

[1] Zheng Ge. YOLOX: Exceeding YOLO Series in 2021. https://arxiv.org/abs/2107.08430, 2021.

+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/notes/changelog/index.html b/en/notes/changelog/index.html new file mode 100644 index 00000000..4c44e7df --- /dev/null +++ b/en/notes/changelog/index.html @@ -0,0 +1,1140 @@ + + + + + + + + + + + + + + + + + + + + + + + + Change Log - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Change Log

+

Coming soon.

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/notes/code_of_conduct/index.html b/en/notes/code_of_conduct/index.html new file mode 100644 index 00000000..d4172429 --- /dev/null +++ b/en/notes/code_of_conduct/index.html @@ -0,0 +1,1140 @@ + + + + + + + + + + + + + + + + + + + + + + + + Code of Conduct - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Code of Conduct

+

Coming soon.

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/notes/contributing/index.html b/en/notes/contributing/index.html new file mode 100644 index 00000000..41425c6e --- /dev/null +++ b/en/notes/contributing/index.html @@ -0,0 +1,1387 @@ + + + + + + + + + + + + + + + + + + + + + + + + Contributing - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

MindYOLO contributing guidelines

+

Contributor License Agreement

+

It's required to sign CLA before your first code submission to MindYOLO community.

+

For individual contributor, please refer to ICLA online document for the detailed information.

+

Getting Started

+ +

Contribution Workflow

+

Code style

+

Please follow this style to make MindYOLO easy to review, maintain and develop.

+
    +
  • +

    Coding guidelines

    +

    The Python coding style suggested by Python PEP 8 Coding Style and C++ coding style suggested by Google C++ Coding Guidelines are used in MindYOLO community. The CppLint, CppCheck, CMakeLint, CodeSpell, Lizard, ShellCheck and PyLint are used to check the format of codes, installing these plugins in your IDE is recommended.

    +
  • +
  • +

    Unittest guidelines

    +

    The Python unittest style suggested by pytest and C++ unittest style suggested by Googletest Primer are used in MindYOLO community. The design intent of a testcase should be reflected by its name of comment.

    +
  • +
  • +

    Refactoring guidelines

    +

    We encourage developers to refactor our code to eliminate the code smell. All codes should conform to needs to the coding style and testing style, and refactoring codes are no exception. Lizard threshold for nloc (lines of code without comments) is 100 and for cnc (cyclomatic complexity number) is 20, when you receive a Lizard warning, you have to refactor the code you want to merge.

    +
  • +
  • +

    Document guidelines

    +

    We use MarkdownLint to check the format of markdown documents. MindYOLO CI modifies the following rules based on the default configuration.

    +
      +
    • MD007 (unordered list indentation): The indent parameter is set to 4, indicating that all content in the unordered list needs to be indented using four spaces.
    • +
    • MD009 (spaces at the line end): The br_spaces parameter is set to 2, indicating that there can be 0 or 2 spaces at the end of a line.
    • +
    • MD029 (sequence numbers of an ordered list): The style parameter is set to ordered, indicating that the sequence numbers of the ordered list are in ascending order.
    • +
    +

    For details, please refer to RULES.

    +
  • +
+

Fork-Pull development model

+
    +
  • +

    Fork MindYOLO repository

    +

    Before submitting code to MindYOLO project, please make sure that this project have been forked to your own repository. It means that there will be parallel development between MindYOLO repository and your own repository, so be careful to avoid the inconsistency between them.

    +
  • +
  • +

    Clone the remote repository

    +

    If you want to download the code to the local machine, git is the best way:

    +
    # For GitHub
    +git clone https://github.com/{insert_your_forked_repo}/mindyolo.git
    +git remote add upstream https://github.com/mindspore-lab/mindyolo.git
    +
    +
  • +
  • +

    Develop code locally

    +

    To avoid inconsistency between multiple branches, checking out to a new branch is SUGGESTED:

    +
    git checkout -b {new_branch_name} origin/master
    +
    +

    Taking the master branch as an example, MindYOLO may create version branches and downstream development branches as needed, please fix bugs upstream first. +Then you can change the code arbitrarily.

    +
  • +
  • +

    Push the code to the remote repository

    +

    After updating the code, you should push the update in the formal way:

    +
    git add .
    +git status # Check the update status
    +git commit -m "Your commit title"
    +git commit -s --amend #Add the concrete description of your commit
    +git push origin {new_branch_name}
    +
    +
  • +
  • +

    Pull a request to MindYOLO repository

    +

    In the last step, your need to pull a compare request between your new branch and MindYOLO master branch. After finishing the pull request, the Jenkins CI will be automatically set up for building test. Your pull request should be merged into the upstream master branch as soon as possible to reduce the risk of merging.

    +
  • +
+

Report issues

+

A great way to contribute to the project is to send a detailed report when you encounter an issue. We always appreciate a well-written, thorough bug report, and will thank you for it!

+

When reporting issues, refer to this format:

+
    +
  • What version of env (MindSpore, os, python, MindYOLO etc) are you using?
  • +
  • Is this a BUG REPORT or FEATURE REQUEST?
  • +
  • What kind of issue is, add the labels to highlight it on the issue dashboard.
  • +
  • What happened?
  • +
  • What you expected to happen?
  • +
  • How to reproduce it?(as minimally and precisely as possible)
  • +
  • Special notes for your reviewers?
  • +
+

Issues advisory:

+
    +
  • If you find an unclosed issue, which is exactly what you are going to solve, please put some comments on that issue to tell others you would be in charge of it.
  • +
  • If an issue is opened for a while, it's recommended for contributors to precheck before working on solving that issue.
  • +
  • If you resolve an issue which is reported by yourself, it's also required to let others know before closing that issue.
  • +
  • If you want the issue to be responded as quickly as possible, please try to label it, you can find kinds of labels on Label List
  • +
+

Propose PRs

+
    +
  • Raise your idea as an issue on GitHub
  • +
  • If it is a new feature that needs lots of design details, a design proposal should also be submitted.
  • +
  • After reaching consensus in the issue discussions and design proposal reviews, complete the development on the forked repo and submit a PR.
  • +
  • None of PRs is not permitted until it receives 2+ LGTM from approvers. Please NOTICE that approver is NOT allowed to add LGTM on his own PR.
  • +
  • After PR is sufficiently discussed, it will get merged, abandoned or rejected depending on the outcome of the discussion.
  • +
+

PRs advisory:

+
    +
  • Any irrelevant changes should be avoided.
  • +
  • Make sure your commit history being ordered.
  • +
  • Always keep your branch up with the master branch.
  • +
  • For bug-fix PRs, make sure all related issues being linked.
  • +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/notes/faq/index.html b/en/notes/faq/index.html new file mode 100644 index 00000000..8dd9ae7b --- /dev/null +++ b/en/notes/faq/index.html @@ -0,0 +1,1123 @@ + + + + + + + + + + + + + + + + + + + + + + FAQ - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+ +
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/reference/data/index.html b/en/reference/data/index.html new file mode 100644 index 00000000..04f448be --- /dev/null +++ b/en/reference/data/index.html @@ -0,0 +1,4265 @@ + + + + + + + + + + + + + + + + + + + + + + + + Data - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Data

+

Data Loader

+ + +
+ + +

+ mindyolo.data.loader.create_loader(dataset, batch_collate_fn, column_names_getitem, column_names_collate, batch_size, epoch_size=1, rank=0, rank_size=1, num_parallel_workers=8, shuffle=True, drop_remainder=False, python_multiprocessing=False) + +

+ + +
+ +

Creates dataloader.

+

Applies operations such as transform and batch to the ms.dataset.Dataset object +created by the create_dataset function to get the dataloader.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
dataset +
+

dataset object created by create_dataset.

+
+

+ + TYPE: + COCODataset + +

+
batch_size +
+

The number of rows each batch is created with. An +int or callable object which takes exactly 1 parameter, BatchInfo.

+
+

+ + TYPE: + int or function + +

+
drop_remainder +
+

Determines whether to drop the last block +whose data row number is less than batch size (default=False). If True, and if there are less +than batch_size rows available to make the last batch, then those rows will +be dropped and not propagated to the child node.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_parallel_workers +
+

Number of workers(threads) to process the dataset in parallel +(default=None).

+
+

+ + TYPE: + int + + + DEFAULT: + 8 + +

+
python_multiprocessing +
+

Parallelize Python operations with multiple worker processes. This +option could be beneficial if the Python operation is computational heavy (default=False).

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

BatchDataset, dataset batched.

+
+
+ +
+ Source code in mindyolo/data/loader.py +
14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
def create_loader(
+    dataset,
+    batch_collate_fn,
+    column_names_getitem,
+    column_names_collate,
+    batch_size,
+    epoch_size=1,
+    rank=0,
+    rank_size=1,
+    num_parallel_workers=8,
+    shuffle=True,
+    drop_remainder=False,
+    python_multiprocessing=False,
+):
+    r"""Creates dataloader.
+
+    Applies operations such as transform and batch to the `ms.dataset.Dataset` object
+    created by the `create_dataset` function to get the dataloader.
+
+    Args:
+        dataset (COCODataset): dataset object created by `create_dataset`.
+        batch_size (int or function): The number of rows each batch is created with. An
+            int or callable object which takes exactly 1 parameter, BatchInfo.
+        drop_remainder (bool, optional): Determines whether to drop the last block
+            whose data row number is less than batch size (default=False). If True, and if there are less
+            than batch_size rows available to make the last batch, then those rows will
+            be dropped and not propagated to the child node.
+        num_parallel_workers (int, optional): Number of workers(threads) to process the dataset in parallel
+            (default=None).
+        python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker processes. This
+            option could be beneficial if the Python operation is computational heavy (default=False).
+
+    Returns:
+        BatchDataset, dataset batched.
+    """
+    de.config.set_seed(1236517205 + rank)
+    cores = multiprocessing.cpu_count()
+    num_parallel_workers = min(int(cores / rank_size), num_parallel_workers)
+    logger.info(f"Dataloader num parallel workers: [{num_parallel_workers}]")
+    if rank_size > 1:
+        ds = de.GeneratorDataset(
+            dataset,
+            column_names=column_names_getitem,
+            num_parallel_workers=min(8, num_parallel_workers),
+            shuffle=shuffle,
+            python_multiprocessing=python_multiprocessing,
+            num_shards=rank_size,
+            shard_id=rank,
+        )
+    else:
+        ds = de.GeneratorDataset(
+            dataset,
+            column_names=column_names_getitem,
+            num_parallel_workers=min(32, num_parallel_workers),
+            shuffle=shuffle,
+            python_multiprocessing=python_multiprocessing,
+        )
+    ds = ds.batch(
+        batch_size, per_batch_map=batch_collate_fn,
+        input_columns=column_names_getitem, output_columns=column_names_collate, drop_remainder=drop_remainder
+    )
+    ds = ds.repeat(epoch_size)
+
+    return ds
+
+
+
+ +

Dataset

+ + +
+ + + +

+ mindyolo.data.dataset.COCODataset + + +

+ + +
+ + +

Load the COCO dataset (yolo format coco labels)

+ + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
dataset_path +
+

dataset label directory for dataset.

+
+

+ + TYPE: + str + + + DEFAULT: + '' + +

+
for +
+

COCO_ROOT + ├── train2017.txt + ├── annotations + │ └── instances_train2017.json + ├── images + │ └── train2017 + │ ├── 000000000001.jpg + │ └── 000000000002.jpg + └── labels + └── train2017 + ├── 000000000001.txt + └── 000000000002.txt +dataset_path (str): ./coco/train2017.txt

+
+

+ + TYPE: + example + +

+
transforms +
+

A list of images data enhancements +that apply data enhancements on data set objects in order.

+
+

+ + TYPE: + list + +

+
+ +
+ Source code in mindyolo/data/dataset.py +
  27
+  28
+  29
+  30
+  31
+  32
+  33
+  34
+  35
+  36
+  37
+  38
+  39
+  40
+  41
+  42
+  43
+  44
+  45
+  46
+  47
+  48
+  49
+  50
+  51
+  52
+  53
+  54
+  55
+  56
+  57
+  58
+  59
+  60
+  61
+  62
+  63
+  64
+  65
+  66
+  67
+  68
+  69
+  70
+  71
+  72
+  73
+  74
+  75
+  76
+  77
+  78
+  79
+  80
+  81
+  82
+  83
+  84
+  85
+  86
+  87
+  88
+  89
+  90
+  91
+  92
+  93
+  94
+  95
+  96
+  97
+  98
+  99
+ 100
+ 101
+ 102
+ 103
+ 104
+ 105
+ 106
+ 107
+ 108
+ 109
+ 110
+ 111
+ 112
+ 113
+ 114
+ 115
+ 116
+ 117
+ 118
+ 119
+ 120
+ 121
+ 122
+ 123
+ 124
+ 125
+ 126
+ 127
+ 128
+ 129
+ 130
+ 131
+ 132
+ 133
+ 134
+ 135
+ 136
+ 137
+ 138
+ 139
+ 140
+ 141
+ 142
+ 143
+ 144
+ 145
+ 146
+ 147
+ 148
+ 149
+ 150
+ 151
+ 152
+ 153
+ 154
+ 155
+ 156
+ 157
+ 158
+ 159
+ 160
+ 161
+ 162
+ 163
+ 164
+ 165
+ 166
+ 167
+ 168
+ 169
+ 170
+ 171
+ 172
+ 173
+ 174
+ 175
+ 176
+ 177
+ 178
+ 179
+ 180
+ 181
+ 182
+ 183
+ 184
+ 185
+ 186
+ 187
+ 188
+ 189
+ 190
+ 191
+ 192
+ 193
+ 194
+ 195
+ 196
+ 197
+ 198
+ 199
+ 200
+ 201
+ 202
+ 203
+ 204
+ 205
+ 206
+ 207
+ 208
+ 209
+ 210
+ 211
+ 212
+ 213
+ 214
+ 215
+ 216
+ 217
+ 218
+ 219
+ 220
+ 221
+ 222
+ 223
+ 224
+ 225
+ 226
+ 227
+ 228
+ 229
+ 230
+ 231
+ 232
+ 233
+ 234
+ 235
+ 236
+ 237
+ 238
+ 239
+ 240
+ 241
+ 242
+ 243
+ 244
+ 245
+ 246
+ 247
+ 248
+ 249
+ 250
+ 251
+ 252
+ 253
+ 254
+ 255
+ 256
+ 257
+ 258
+ 259
+ 260
+ 261
+ 262
+ 263
+ 264
+ 265
+ 266
+ 267
+ 268
+ 269
+ 270
+ 271
+ 272
+ 273
+ 274
+ 275
+ 276
+ 277
+ 278
+ 279
+ 280
+ 281
+ 282
+ 283
+ 284
+ 285
+ 286
+ 287
+ 288
+ 289
+ 290
+ 291
+ 292
+ 293
+ 294
+ 295
+ 296
+ 297
+ 298
+ 299
+ 300
+ 301
+ 302
+ 303
+ 304
+ 305
+ 306
+ 307
+ 308
+ 309
+ 310
+ 311
+ 312
+ 313
+ 314
+ 315
+ 316
+ 317
+ 318
+ 319
+ 320
+ 321
+ 322
+ 323
+ 324
+ 325
+ 326
+ 327
+ 328
+ 329
+ 330
+ 331
+ 332
+ 333
+ 334
+ 335
+ 336
+ 337
+ 338
+ 339
+ 340
+ 341
+ 342
+ 343
+ 344
+ 345
+ 346
+ 347
+ 348
+ 349
+ 350
+ 351
+ 352
+ 353
+ 354
+ 355
+ 356
+ 357
+ 358
+ 359
+ 360
+ 361
+ 362
+ 363
+ 364
+ 365
+ 366
+ 367
+ 368
+ 369
+ 370
+ 371
+ 372
+ 373
+ 374
+ 375
+ 376
+ 377
+ 378
+ 379
+ 380
+ 381
+ 382
+ 383
+ 384
+ 385
+ 386
+ 387
+ 388
+ 389
+ 390
+ 391
+ 392
+ 393
+ 394
+ 395
+ 396
+ 397
+ 398
+ 399
+ 400
+ 401
+ 402
+ 403
+ 404
+ 405
+ 406
+ 407
+ 408
+ 409
+ 410
+ 411
+ 412
+ 413
+ 414
+ 415
+ 416
+ 417
+ 418
+ 419
+ 420
+ 421
+ 422
+ 423
+ 424
+ 425
+ 426
+ 427
+ 428
+ 429
+ 430
+ 431
+ 432
+ 433
+ 434
+ 435
+ 436
+ 437
+ 438
+ 439
+ 440
+ 441
+ 442
+ 443
+ 444
+ 445
+ 446
+ 447
+ 448
+ 449
+ 450
+ 451
+ 452
+ 453
+ 454
+ 455
+ 456
+ 457
+ 458
+ 459
+ 460
+ 461
+ 462
+ 463
+ 464
+ 465
+ 466
+ 467
+ 468
+ 469
+ 470
+ 471
+ 472
+ 473
+ 474
+ 475
+ 476
+ 477
+ 478
+ 479
+ 480
+ 481
+ 482
+ 483
+ 484
+ 485
+ 486
+ 487
+ 488
+ 489
+ 490
+ 491
+ 492
+ 493
+ 494
+ 495
+ 496
+ 497
+ 498
+ 499
+ 500
+ 501
+ 502
+ 503
+ 504
+ 505
+ 506
+ 507
+ 508
+ 509
+ 510
+ 511
+ 512
+ 513
+ 514
+ 515
+ 516
+ 517
+ 518
+ 519
+ 520
+ 521
+ 522
+ 523
+ 524
+ 525
+ 526
+ 527
+ 528
+ 529
+ 530
+ 531
+ 532
+ 533
+ 534
+ 535
+ 536
+ 537
+ 538
+ 539
+ 540
+ 541
+ 542
+ 543
+ 544
+ 545
+ 546
+ 547
+ 548
+ 549
+ 550
+ 551
+ 552
+ 553
+ 554
+ 555
+ 556
+ 557
+ 558
+ 559
+ 560
+ 561
+ 562
+ 563
+ 564
+ 565
+ 566
+ 567
+ 568
+ 569
+ 570
+ 571
+ 572
+ 573
+ 574
+ 575
+ 576
+ 577
+ 578
+ 579
+ 580
+ 581
+ 582
+ 583
+ 584
+ 585
+ 586
+ 587
+ 588
+ 589
+ 590
+ 591
+ 592
+ 593
+ 594
+ 595
+ 596
+ 597
+ 598
+ 599
+ 600
+ 601
+ 602
+ 603
+ 604
+ 605
+ 606
+ 607
+ 608
+ 609
+ 610
+ 611
+ 612
+ 613
+ 614
+ 615
+ 616
+ 617
+ 618
+ 619
+ 620
+ 621
+ 622
+ 623
+ 624
+ 625
+ 626
+ 627
+ 628
+ 629
+ 630
+ 631
+ 632
+ 633
+ 634
+ 635
+ 636
+ 637
+ 638
+ 639
+ 640
+ 641
+ 642
+ 643
+ 644
+ 645
+ 646
+ 647
+ 648
+ 649
+ 650
+ 651
+ 652
+ 653
+ 654
+ 655
+ 656
+ 657
+ 658
+ 659
+ 660
+ 661
+ 662
+ 663
+ 664
+ 665
+ 666
+ 667
+ 668
+ 669
+ 670
+ 671
+ 672
+ 673
+ 674
+ 675
+ 676
+ 677
+ 678
+ 679
+ 680
+ 681
+ 682
+ 683
+ 684
+ 685
+ 686
+ 687
+ 688
+ 689
+ 690
+ 691
+ 692
+ 693
+ 694
+ 695
+ 696
+ 697
+ 698
+ 699
+ 700
+ 701
+ 702
+ 703
+ 704
+ 705
+ 706
+ 707
+ 708
+ 709
+ 710
+ 711
+ 712
+ 713
+ 714
+ 715
+ 716
+ 717
+ 718
+ 719
+ 720
+ 721
+ 722
+ 723
+ 724
+ 725
+ 726
+ 727
+ 728
+ 729
+ 730
+ 731
+ 732
+ 733
+ 734
+ 735
+ 736
+ 737
+ 738
+ 739
+ 740
+ 741
+ 742
+ 743
+ 744
+ 745
+ 746
+ 747
+ 748
+ 749
+ 750
+ 751
+ 752
+ 753
+ 754
+ 755
+ 756
+ 757
+ 758
+ 759
+ 760
+ 761
+ 762
+ 763
+ 764
+ 765
+ 766
+ 767
+ 768
+ 769
+ 770
+ 771
+ 772
+ 773
+ 774
+ 775
+ 776
+ 777
+ 778
+ 779
+ 780
+ 781
+ 782
+ 783
+ 784
+ 785
+ 786
+ 787
+ 788
+ 789
+ 790
+ 791
+ 792
+ 793
+ 794
+ 795
+ 796
+ 797
+ 798
+ 799
+ 800
+ 801
+ 802
+ 803
+ 804
+ 805
+ 806
+ 807
+ 808
+ 809
+ 810
+ 811
+ 812
+ 813
+ 814
+ 815
+ 816
+ 817
+ 818
+ 819
+ 820
+ 821
+ 822
+ 823
+ 824
+ 825
+ 826
+ 827
+ 828
+ 829
+ 830
+ 831
+ 832
+ 833
+ 834
+ 835
+ 836
+ 837
+ 838
+ 839
+ 840
+ 841
+ 842
+ 843
+ 844
+ 845
+ 846
+ 847
+ 848
+ 849
+ 850
+ 851
+ 852
+ 853
+ 854
+ 855
+ 856
+ 857
+ 858
+ 859
+ 860
+ 861
+ 862
+ 863
+ 864
+ 865
+ 866
+ 867
+ 868
+ 869
+ 870
+ 871
+ 872
+ 873
+ 874
+ 875
+ 876
+ 877
+ 878
+ 879
+ 880
+ 881
+ 882
+ 883
+ 884
+ 885
+ 886
+ 887
+ 888
+ 889
+ 890
+ 891
+ 892
+ 893
+ 894
+ 895
+ 896
+ 897
+ 898
+ 899
+ 900
+ 901
+ 902
+ 903
+ 904
+ 905
+ 906
+ 907
+ 908
+ 909
+ 910
+ 911
+ 912
+ 913
+ 914
+ 915
+ 916
+ 917
+ 918
+ 919
+ 920
+ 921
+ 922
+ 923
+ 924
+ 925
+ 926
+ 927
+ 928
+ 929
+ 930
+ 931
+ 932
+ 933
+ 934
+ 935
+ 936
+ 937
+ 938
+ 939
+ 940
+ 941
+ 942
+ 943
+ 944
+ 945
+ 946
+ 947
+ 948
+ 949
+ 950
+ 951
+ 952
+ 953
+ 954
+ 955
+ 956
+ 957
+ 958
+ 959
+ 960
+ 961
+ 962
+ 963
+ 964
+ 965
+ 966
+ 967
+ 968
+ 969
+ 970
+ 971
+ 972
+ 973
+ 974
+ 975
+ 976
+ 977
+ 978
+ 979
+ 980
+ 981
+ 982
+ 983
+ 984
+ 985
+ 986
+ 987
+ 988
+ 989
+ 990
+ 991
+ 992
+ 993
+ 994
+ 995
+ 996
+ 997
+ 998
+ 999
+1000
+1001
+1002
+1003
+1004
+1005
+1006
+1007
+1008
+1009
+1010
+1011
+1012
+1013
+1014
+1015
+1016
+1017
+1018
+1019
+1020
+1021
+1022
+1023
+1024
+1025
+1026
+1027
+1028
+1029
+1030
+1031
+1032
+1033
+1034
+1035
+1036
+1037
+1038
+1039
+1040
+1041
+1042
+1043
+1044
+1045
+1046
+1047
+1048
+1049
+1050
+1051
+1052
+1053
+1054
+1055
+1056
+1057
+1058
+1059
+1060
+1061
+1062
+1063
+1064
+1065
+1066
+1067
+1068
+1069
+1070
+1071
+1072
+1073
+1074
+1075
+1076
+1077
+1078
+1079
+1080
+1081
+1082
+1083
+1084
+1085
+1086
+1087
+1088
+1089
+1090
+1091
+1092
+1093
+1094
+1095
+1096
+1097
+1098
+1099
+1100
+1101
+1102
+1103
+1104
+1105
+1106
+1107
+1108
+1109
+1110
+1111
+1112
+1113
+1114
+1115
+1116
+1117
+1118
+1119
+1120
+1121
+1122
+1123
+1124
+1125
+1126
+1127
+1128
+1129
+1130
+1131
+1132
+1133
+1134
+1135
+1136
+1137
+1138
+1139
+1140
+1141
+1142
+1143
+1144
+1145
+1146
+1147
+1148
+1149
+1150
+1151
+1152
+1153
+1154
+1155
+1156
+1157
+1158
+1159
+1160
+1161
+1162
+1163
+1164
+1165
+1166
+1167
+1168
+1169
+1170
+1171
+1172
+1173
+1174
+1175
+1176
+1177
+1178
+1179
+1180
+1181
+1182
+1183
+1184
+1185
+1186
+1187
+1188
class COCODataset:
+    """
+    Load the COCO dataset (yolo format coco labels)
+
+    Args:
+        dataset_path (str): dataset label directory for dataset.
+        for example:
+            COCO_ROOT
+                ├── train2017.txt
+                ├── annotations
+                │     └── instances_train2017.json
+                ├── images
+                │     └── train2017
+                │             ├── 000000000001.jpg
+                │             └── 000000000002.jpg
+                └── labels
+                      └── train2017
+                              ├── 000000000001.txt
+                              └── 000000000002.txt
+            dataset_path (str): ./coco/train2017.txt
+        transforms (list): A list of images data enhancements
+            that apply data enhancements on data set objects in order.
+    """
+
+    def __init__(
+        self,
+        dataset_path="",
+        img_size=640,
+        transforms_dict=None,
+        is_training=False,
+        augment=False,
+        rect=False,
+        single_cls=False,
+        batch_size=32,
+        stride=32,
+        num_cls=80,
+        pad=0.0,
+        return_segments=False,  # for segment
+        return_keypoints=False, # for keypoint
+        nkpt=0,                 # for keypoint
+        ndim=0                  # for keypoint
+    ):
+        # acceptable image suffixes
+        self.img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo']
+        self.cache_version = 0.2
+
+        self.return_segments = return_segments
+        self.return_keypoints = return_keypoints
+        assert not (return_segments and return_keypoints), 'Can not return both segments and keypoints.'
+
+        self.path = dataset_path
+        self.img_size = img_size
+        self.augment = augment
+        self.rect = rect
+        self.stride = stride
+        self.num_cls = num_cls
+        self.nkpt = nkpt
+        self.ndim = ndim
+        self.transforms_dict = transforms_dict
+        self.is_training = is_training
+
+        # set column names
+        self.column_names_getitem = ['samples']
+        if self.is_training:
+            self.column_names_collate = ['images', 'labels']
+            if self.return_segments:
+                self.column_names_collate = ['images', 'labels', 'masks']
+            elif self.return_keypoints:
+                self.column_names_collate = ['images', 'labels', 'keypoints']
+        else:
+            self.column_names_collate = ["images", "img_files", "hw_ori", "hw_scale", "pad"]
+
+        try:
+            f = []  # image files
+            for p in self.path if isinstance(self.path, list) else [self.path]:
+                p = Path(p)  # os-agnostic
+                if p.is_dir():  # dir
+                    f += glob.glob(str(p / "**" / "*.*"), recursive=True)
+                elif p.is_file():  # file
+                    with open(p, "r") as t:
+                        t = t.read().strip().splitlines()
+                        parent = str(p.parent) + os.sep
+                        f += [x.replace("./", parent) if x.startswith("./") else x for x in t]  # local to global path
+                else:
+                    raise Exception(f"{p} does not exist")
+            self.img_files = sorted([x.replace("/", os.sep) for x in f if x.split(".")[-1].lower() in self.img_formats])
+            assert self.img_files, f"No images found"
+        except Exception as e:
+            raise Exception(f"Error loading data from {self.path}: {e}\n")
+
+        # Check cache
+        self.label_files = self._img2label_paths(self.img_files)  # labels
+        cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix(".cache.npy")  # cached labels
+        if cache_path.is_file():
+            cache, exists = np.load(cache_path, allow_pickle=True).item(), True  # load dict
+            if cache["version"] == self.cache_version \
+                    and cache["hash"] == self._get_hash(self.label_files + self.img_files):
+                logger.info(f"Dataset Cache file hash/version check success.")
+                logger.info(f"Load dataset cache from [{cache_path}] success.")
+            else:
+                logger.info(f"Dataset cache file hash/version check fail.")
+                logger.info(f"Datset caching now...")
+                cache, exists = self.cache_labels(cache_path), False  # cache
+                logger.info(f"Dataset caching success.")
+        else:
+            logger.info(f"No dataset cache available, caching now...")
+            cache, exists = self.cache_labels(cache_path), False  # cache
+            logger.info(f"Dataset caching success.")
+
+        # Display cache
+        nf, nm, ne, nc, n = cache.pop("results")  # found, missing, empty, corrupted, total
+        if exists:
+            d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"
+            tqdm(None, desc=d, total=n, initial=n)  # display cache results
+        assert nf > 0 or not augment, f"No labels in {cache_path}. Can not train without labels."
+
+        # Read cache
+        cache.pop("hash")  # remove hash
+        cache.pop("version")  # remove version
+        self.labels = cache['labels']
+        self.img_files = [lb['im_file'] for lb in self.labels]  # update im_files
+
+        # Check if the dataset is all boxes or all segments
+        lengths = ((len(lb['cls']), len(lb['bboxes']), len(lb['segments'])) for lb in self.labels)
+        len_cls, len_boxes, len_segments = (sum(x) for x in zip(*lengths))
+        if len_segments and len_boxes != len_segments:
+            print(
+                f'WARNING ⚠️ Box and segment counts should be equal, but got len(segments) = {len_segments}, '
+                f'len(boxes) = {len_boxes}. To resolve this only boxes will be used and all segments will be removed. '
+                'To avoid this please supply either a detect or segment dataset, not a detect-segment mixed dataset.')
+            for lb in self.labels:
+                lb['segments'] = []
+        if len_cls == 0:
+            raise ValueError(f'All labels empty in {cache_path}, can not start training without labels.')
+
+        if single_cls:
+            for x in self.labels:
+                x['cls'][:, 0] = 0
+
+        n = len(self.labels)  # number of images
+        bi = np.floor(np.arange(n) / batch_size).astype(np.int_)  # batch index
+        nb = bi[-1] + 1  # number of batches
+        self.batch = bi  # batch index of image
+
+        # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
+        self.imgs, self.img_hw_ori, self.indices = None, None, range(n)
+
+        # Rectangular Train/Test
+        if self.rect:
+            # Sort by aspect ratio
+            s = self.img_shapes  # wh
+            ar = s[:, 1] / s[:, 0]  # aspect ratio
+            irect = ar.argsort()
+            self.img_files = [self.img_files[i] for i in irect]
+            self.label_files = [self.label_files[i] for i in irect]
+            self.labels = [self.labels[i] for i in irect]
+            self.img_shapes = s[irect]  # wh
+            ar = ar[irect]
+
+            # Set training image shapes
+            shapes = [[1, 1]] * nb
+            for i in range(nb):
+                ari = ar[bi == i]
+                mini, maxi = ari.min(), ari.max()
+                if maxi < 1:
+                    shapes[i] = [maxi, 1]
+                elif mini > 1:
+                    shapes[i] = [1, 1 / mini]
+
+            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int_) * stride
+
+        self.imgIds = [int(Path(im_file).stem) for im_file in self.img_files]
+
+    def cache_labels(self, path=Path("./labels.cache.npy")):
+        # Cache dataset labels, check images and read shapes
+        x = {'labels': []}  # dict
+        nm, nf, ne, nc, segments, keypoints = 0, 0, 0, 0, [], None  # number missing, found, empty, duplicate
+        pbar = tqdm(zip(self.img_files, self.label_files), desc="Scanning images", total=len(self.img_files))
+        if self.return_keypoints and (self.nkpt <= 0 or self.ndim not in (2, 3)):
+            raise ValueError("'kpt_shape' in data.yaml missing or incorrect. Should be a list with [number of "
+                             "keypoints, number of dims (2 for x,y or 3 for x,y,visible)], i.e. 'kpt_shape: [17, 3]'")
+        for i, (im_file, lb_file) in enumerate(pbar):
+            try:
+                # verify images
+                im = Image.open(im_file)
+                im.verify()  # PIL verify
+                shape = self._exif_size(im)  # image size
+                segments = []  # instance segments
+                assert (shape[0] > 9) & (shape[1] > 9), f"image size {shape} <10 pixels"
+                assert im.format.lower() in self.img_formats, f"invalid image format {im.format}"
+
+                # verify labels
+                if os.path.isfile(lb_file):
+                    nf += 1  # label found
+                    with open(lb_file, "r") as f:
+                        lb = [x.split() for x in f.read().strip().splitlines()]
+                        if any([len(x) > 6 for x in lb]) and (not self.return_keypoints):  # is segment
+                            classes = np.array([x[0] for x in lb], dtype=np.float32)
+                            segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb]  # (cls, xy1...)
+                            lb = np.concatenate(
+                                (classes.reshape(-1, 1), segments2boxes(segments)), 1
+                            )  # (cls, xywh)
+                        lb = np.array(lb, dtype=np.float32)
+                    nl = len(lb)
+                    if nl:
+                        if self.return_keypoints:
+                            assert lb.shape[1] == (5 + self.nkpt * self.ndim), \
+                                f'labels require {(5 + self.nkpt * self.ndim)} columns each'
+                            assert (lb[:, 5::self.ndim] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
+                            assert (lb[:, 6::self.ndim] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
+                        else:
+                            assert lb.shape[1] == 5, f'labels require 5 columns, {lb.shape[1]} columns detected'
+                            assert (lb[:, 1:] <= 1).all(), \
+                                f'non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}'
+                            assert (lb >= 0).all(), f'negative label values {lb[lb < 0]}'
+                        # All labels
+                        max_cls = int(lb[:, 0].max())  # max label count
+                        assert max_cls <= self.num_cls, \
+                            f'Label class {max_cls} exceeds dataset class count {self.num_cls}. ' \
+                            f'Possible class labels are 0-{self.num_cls - 1}'
+                        _, j = np.unique(lb, axis=0, return_index=True)
+                        if len(j) < nl:  # duplicate row check
+                            lb = lb[j]  # remove duplicates
+                            if segments:
+                                segments = [segments[x] for x in i]
+                            print(f'WARNING ⚠️ {im_file}: {nl - len(j)} duplicate labels removed')
+                    else:
+                        ne += 1  # label empty
+                        lb = np.zeros((0, (5 + self.nkpt * self.ndim)), dtype=np.float32) \
+                            if self.return_keypoints else np.zeros((0, 5), dtype=np.float32)
+                else:
+                    nm += 1  # label missing
+                    lb = np.zeros((0, (5 + self.nkpt * self.ndim)), dtype=np.float32) \
+                        if self.return_keypoints else np.zeros((0, 5), dtype=np.float32)
+                if self.return_keypoints:
+                    keypoints = lb[:, 5:].reshape(-1, self.nkpt, self.ndim)
+                    if self.ndim == 2:
+                        kpt_mask = np.ones(keypoints.shape[:2], dtype=np.float32)
+                        kpt_mask = np.where(keypoints[..., 0] < 0, 0.0, kpt_mask)
+                        kpt_mask = np.where(keypoints[..., 1] < 0, 0.0, kpt_mask)
+                        keypoints = np.concatenate([keypoints, kpt_mask[..., None]], axis=-1)  # (nl, nkpt, 3)
+                lb = lb[:, :5]
+                x['labels'].append(
+                    dict(
+                        im_file=im_file,
+                        cls=lb[:, 0:1],     # (n, 1)
+                        bboxes=lb[:, 1:],   # (n, 4)
+                        segments=segments,  # list of (mi, 2)
+                        keypoints=keypoints,
+                        bbox_format='xywhn',
+                        segment_format='polygon'
+                    )
+                )
+            except Exception as e:
+                nc += 1
+                print(f"WARNING: Ignoring corrupted image and/or label {im_file}: {e}")
+
+            pbar.desc = f"Scanning '{path.parent / path.stem}' images and labels... " \
+                        f"{nf} found, {nm} missing, {ne} empty, {nc} corrupted"
+        pbar.close()
+
+        if nf == 0:
+            print(f"WARNING: No labels found in {path}.")
+
+        x["hash"] = self._get_hash(self.label_files + self.img_files)
+        x["results"] = nf, nm, ne, nc, len(self.img_files)
+        x["version"] = self.cache_version  # cache version
+        np.save(path, x)  # save for next time
+        logger.info(f"New cache created: {path}")
+        return x
+
+    def __getitem__(self, index):
+        sample = self.get_sample(index)
+
+        for _i, ori_trans in enumerate(self.transforms_dict):
+            _trans = ori_trans.copy()
+            func_name, prob = _trans.pop("func_name"), _trans.pop("prob", 1.0)
+            if func_name == 'copy_paste':
+                sample = self.copy_paste(sample, prob)
+            elif random.random() < prob:
+                if func_name == "albumentations" and getattr(self, "albumentations", None) is None:
+                    self.albumentations = Albumentations(size=self.img_size, **_trans)
+                if func_name == "letterbox":
+                    new_shape = self.img_size if not self.rect else self.batch_shapes[self.batch[index]]
+                    sample = self.letterbox(sample, new_shape, **_trans)
+                else:
+                    sample = getattr(self, func_name)(sample, **_trans)
+
+        sample['img'] = np.ascontiguousarray(sample['img'])
+        return sample
+
+    def __len__(self):
+        return len(self.img_files)
+
+    def get_sample(self, index):
+        """Get and return label information from the dataset."""
+        sample = deepcopy(self.labels[index])
+        if self.imgs is None:
+            path = self.img_files[index]
+            img = cv2.imread(path)  # BGR
+            assert img is not None, "Image Not Found " + path
+            h_ori, w_ori = img.shape[:2]  # orig hw
+            r = self.img_size / max(h_ori, w_ori)  # resize image to img_size
+            if r != 1:  # always resize down, only resize up if training with augmentation
+                interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
+                img = cv2.resize(img, (int(w_ori * r), int(h_ori * r)), interpolation=interp)
+
+            sample['img'], sample['ori_shape'] = img, np.array([h_ori, w_ori])  # img, hw_original
+
+        else:
+            sample['img'], sample['ori_shape'] = self.imgs[index], self.img_hw_ori[index]  # img, hw_original
+
+        return sample
+
+    def mosaic(
+        self,
+        sample,
+        mosaic9_prob=0.0,
+        post_transform=None,
+    ):
+        segment_format = sample['segment_format']
+        bbox_format = sample['bbox_format']
+        assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'
+        assert bbox_format == 'xywhn', f'The bbox format should be xywhn, but got {bbox_format}'
+
+        mosaic9_prob = min(1.0, max(mosaic9_prob, 0.0))
+        if random.random() < (1 - mosaic9_prob):
+            sample = self._mosaic4(sample)
+        else:
+            sample = self._mosaic9(sample)
+
+        if post_transform:
+            for _i, ori_trans in enumerate(post_transform):
+                _trans = ori_trans.copy()
+                func_name, prob = _trans.pop("func_name"), _trans.pop("prob", 1.0)
+                sample = getattr(self, func_name)(sample, **_trans)
+
+        return sample
+
+    def _mosaic4(self, sample):
+        # loads images in a 4-mosaic
+        classes4, bboxes4, segments4 = [], [], []
+        mosaic_samples = [sample, ]
+        indices = random.choices(self.indices, k=3)  # 3 additional image indices
+
+        segments_is_list = isinstance(sample['segments'], list)
+        if segments_is_list:
+            mosaic_samples += [self.get_sample(i) for i in indices]
+        else:
+            mosaic_samples += [self.resample_segments(self.get_sample(i)) for i in indices]
+
+        s = self.img_size
+        mosaic_border = [-s // 2, -s // 2]
+        yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in mosaic_border]  # mosaic center x, y
+
+        for i, mosaic_sample in enumerate(mosaic_samples):
+            # Load image
+            img = mosaic_sample['img']
+            (h, w) = img.shape[:2]
+
+            # place img in img4
+            if i == 0:  # top left
+                img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
+            elif i == 1:  # top right
+                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
+                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
+            elif i == 2:  # bottom left
+                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
+            elif i == 3:  # bottom right
+                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
+
+            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+            padw = x1a - x1b
+            padh = y1a - y1b
+
+            # box and cls
+            cls, bboxes = mosaic_sample['cls'], mosaic_sample['bboxes']
+            assert mosaic_sample['bbox_format'] == 'xywhn'
+            bboxes = xywhn2xyxy(bboxes, w, h, padw, padh)  # normalized xywh to pixel xyxy format
+            classes4.append(cls)
+            bboxes4.append(bboxes)
+
+            # seg
+            assert mosaic_sample['segment_format'] == 'polygon'
+            segments = mosaic_sample['segments']
+            if segments_is_list:
+                segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
+                segments4.extend(segments)
+            else:
+                segments = xyn2xy(segments, w, h, padw, padh)
+                segments4.append(segments)
+
+        classes4 = np.concatenate(classes4, 0)
+        bboxes4 = np.concatenate(bboxes4, 0)
+        bboxes4 = bboxes4.clip(0, 2 * s)
+
+        if segments_is_list:
+            for x in segments4:
+                np.clip(x, 0, 2 * s, out=x)
+        else:
+            segments4 = np.concatenate(segments4, 0)
+            segments4 = segments4.clip(0, 2 * s)
+
+        sample['img'] = img4
+        sample['cls'] = classes4
+        sample['bboxes'] = bboxes4
+        sample['bbox_format'] = 'ltrb'
+        sample['segments'] = segments4
+        sample['mosaic_border'] = mosaic_border
+
+        return sample
+
+    def _mosaic9(self, sample):
+        # loads images in a 9-mosaic
+        classes9, bboxes9, segments9 = [], [], []
+        mosaic_samples = [sample, ]
+        indices = random.choices(self.indices, k=8)  # 8 additional image indices
+
+        segments_is_list = isinstance(sample['segments'], list)
+        if segments_is_list:
+            mosaic_samples += [self.get_sample(i) for i in indices]
+        else:
+            mosaic_samples += [self.resample_segments(self.get_sample(i)) for i in indices]
+        s = self.img_size
+        mosaic_border = [-s // 2, -s // 2]
+
+        for i, mosaic_sample in enumerate(mosaic_samples):
+            # Load image
+            img = mosaic_sample['img']
+            (h, w) = img.shape[:2]
+
+            # place img in img9
+            if i == 0:  # center
+                img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+                h0, w0 = h, w
+                c = s, s, s + w, s + h  # xmin, ymin, xmax, ymax (base) coordinates
+            elif i == 1:  # top
+                c = s, s - h, s + w, s
+            elif i == 2:  # top right
+                c = s + wp, s - h, s + wp + w, s
+            elif i == 3:  # right
+                c = s + w0, s, s + w0 + w, s + h
+            elif i == 4:  # bottom right
+                c = s + w0, s + hp, s + w0 + w, s + hp + h
+            elif i == 5:  # bottom
+                c = s + w0 - w, s + h0, s + w0, s + h0 + h
+            elif i == 6:  # bottom left
+                c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
+            elif i == 7:  # left
+                c = s - w, s + h0 - h, s, s + h0
+            elif i == 8:  # top left
+                c = s - w, s + h0 - hp - h, s, s + h0 - hp
+
+            padx, pady = c[:2]
+            x1, y1, x2, y2 = [max(x, 0) for x in c]  # allocate coords
+
+            # box and cls
+            assert mosaic_sample['bbox_format'] == 'xywhn'
+            cls, bboxes = mosaic_sample['cls'], mosaic_sample['bboxes']
+            bboxes = xywhn2xyxy(bboxes, w, h, padx, pady)  # normalized xywh to pixel xyxy format
+            classes9.append(cls)
+            bboxes9.append(bboxes)
+
+            # seg
+            assert mosaic_sample['segment_format'] == 'polygon'
+            segments = mosaic_sample['segments']
+            if segments_is_list:
+                segments = [xyn2xy(x, w, h, padx, pady) for x in segments]
+                segments9.extend(segments)
+            else:
+                segments = xyn2xy(segments, w, h, padx, pady)
+                segments9.append(segments)
+
+            # Image
+            img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:]  # img9[ymin:ymax, xmin:xmax]
+            hp, wp = h, w  # height, width previous
+
+        # Offset
+        yc, xc = [int(random.uniform(0, s)) for _ in mosaic_border]  # mosaic center x, y
+        img9 = img9[yc: yc + 2 * s, xc: xc + 2 * s]
+
+        # Concat/clip labels
+        classes9 = np.concatenate(classes9, 0)
+        bboxes9 = np.concatenate(bboxes9, 0)
+        bboxes9[:, [0, 2]] -= xc
+        bboxes9[:, [1, 3]] -= yc
+        bboxes9 = bboxes9.clip(0, 2 * s)
+
+        if segments_is_list:
+            c = np.array([xc, yc])  # centers
+            segments9 = [x - c for x in segments9]
+            for x in segments9:
+                np.clip(x, 0, 2 * s, out=x)
+        else:
+            segments9 = np.concatenate(segments9, 0)
+            segments9[..., 0] -= xc
+            segments9[..., 1] -= yc
+            segments9 = segments9.clip(0, 2 * s)
+
+        sample['img'] = img9
+        sample['cls'] = classes9
+        sample['bboxes'] = bboxes9
+        sample['bbox_format'] = 'ltrb'
+        sample['segments'] = segments9
+        sample['mosaic_border'] = mosaic_border
+
+        return sample
+
+    def resample_segments(self, sample, n=1000):
+        segment_format = sample['segment_format']
+        assert segment_format == 'polygon', f'The segment format is should be polygon, but got {segment_format}'
+
+        segments = sample['segments']
+        if len(segments) > 0:
+            # Up-sample an (n,2) segment
+            for i, s in enumerate(segments):
+                s = np.concatenate((s, s[0:1, :]), axis=0)
+                x = np.linspace(0, len(s) - 1, n)
+                xp = np.arange(len(s))
+                segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T  # segment xy
+            segments = np.stack(segments, axis=0)
+        else:
+            segments = np.zeros((0, 1000, 2), dtype=np.float32)
+        sample['segments'] = segments
+        return sample
+
+    def copy_paste(self, sample, probability=0.5):
+        # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
+        bbox_format, segment_format = sample['bbox_format'], sample['segment_format']
+        assert bbox_format == 'ltrb', f'The bbox format should be ltrb, but got {bbox_format}'
+        assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'
+
+        img = sample['img']
+        cls = sample['cls']
+        bboxes = sample['bboxes']
+        segments = sample['segments']
+
+        n = len(segments)
+        if probability and n:
+            h, w, _ = img.shape  # height, width, channels
+            im_new = np.zeros(img.shape, np.uint8)
+            for j in random.sample(range(n), k=round(probability * n)):
+                c, l, s = cls[j], bboxes[j], segments[j]
+                box = w - l[2], l[1], w - l[0], l[3]
+                ioa = bbox_ioa(box, bboxes)  # intersection over area
+                if (ioa < 0.30).all():  # allow 30% obscuration of existing labels
+                    cls = np.concatenate((cls, [c]), 0)
+                    bboxes = np.concatenate((bboxes, [box]), 0)
+                    if isinstance(segments, list):
+                        segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
+                    else:
+                        segments = np.concatenate((segments, [np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1)]), 0)
+                    cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
+
+            result = cv2.bitwise_and(src1=img, src2=im_new)
+            result = cv2.flip(result, 1)  # augment segments (flip left-right)
+            i = result > 0  # pixels to replace
+            img[i] = result[i]  # cv2.imwrite('debug.jpg', img)  # debug
+
+        sample['img'] = img
+        sample['cls'] = cls
+        sample['bboxes'] = bboxes
+        sample['segments'] = segments
+
+        return sample
+
+    def random_perspective(
+            self, sample, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, border=(0, 0)
+    ):
+        bbox_format, segment_format = sample['bbox_format'], sample['segment_format']
+        assert bbox_format == 'ltrb', f'The bbox format should be ltrb, but got {bbox_format}'
+        assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'
+
+        img = sample['img']
+        cls = sample['cls']
+        targets = sample['bboxes']
+        segments = sample['segments']
+        assert isinstance(segments, np.ndarray), f"segments type expect numpy.ndarray, but got {type(segments)}; " \
+                                                 f"maybe you should resample_segments before that."
+
+        border = sample.pop('mosaic_border', border)
+        height = img.shape[0] + border[0] * 2  # shape(h,w,c)
+        width = img.shape[1] + border[1] * 2
+
+        # Center
+        C = np.eye(3)
+        C[0, 2] = -img.shape[1] / 2  # x translation (pixels)
+        C[1, 2] = -img.shape[0] / 2  # y translation (pixels)
+
+        # Perspective
+        P = np.eye(3)
+        P[2, 0] = random.uniform(-perspective, perspective)  # x perspective (about y)
+        P[2, 1] = random.uniform(-perspective, perspective)  # y perspective (about x)
+
+        # Rotation and Scale
+        R = np.eye(3)
+        a = random.uniform(-degrees, degrees)
+        s = random.uniform(1 - scale, 1 + scale)
+        R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
+
+        # Shear
+        S = np.eye(3)
+        S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
+        S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)
+
+        # Translation
+        T = np.eye(3)
+        T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width  # x translation (pixels)
+        T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height  # y translation (pixels)
+
+        # Combined rotation matrix
+        M = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANT
+        if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed
+            if perspective:
+                img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))
+            else:  # affine
+                img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
+
+        # Transform label coordinates
+        n = len(targets)
+        if n:
+            use_segments = len(segments)
+            new_bboxes = np.zeros((n, 4))
+            if use_segments:  # warp segments
+                point_num = segments[0].shape[0]
+                new_segments = np.zeros((n, point_num, 2))
+                for i, segment in enumerate(segments):
+                    xy = np.ones((len(segment), 3))
+                    xy[:, :2] = segment
+                    xy = xy @ M.T  # transform
+                    xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]  # perspective rescale or affine
+
+                    # clip
+                    new_segments[i] = xy
+                    new_bboxes[i] = segment2box(xy, width, height)
+
+            else:  # warp boxes
+                xy = np.ones((n * 4, 3))
+                xy[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
+                xy = xy @ M.T  # transform
+                xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8)  # perspective rescale or affine
+
+                # create new boxes
+                x = xy[:, [0, 2, 4, 6]]
+                y = xy[:, [1, 3, 5, 7]]
+                new_bboxes = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+
+                # clip
+                new_bboxes[:, [0, 2]] = new_bboxes[:, [0, 2]].clip(0, width)
+                new_bboxes[:, [1, 3]] = new_bboxes[:, [1, 3]].clip(0, height)
+
+            # filter candidates
+            i = box_candidates(box1=targets.T * s, box2=new_bboxes.T, area_thr=0.01 if use_segments else 0.10)
+
+            cls = cls[i]
+            targets = new_bboxes[i]
+            sample['cls'] = cls
+            sample['bboxes'] = targets
+            if use_segments:
+                sample['segments'] = segments[i]
+
+        sample['img'] = img
+
+        return sample
+
+    def mixup(self, sample, alpha: 32.0, beta: 32.0, pre_transform=None):
+        bbox_format, segment_format = sample['bbox_format'], sample['segment_format']
+        assert bbox_format == 'ltrb', f'The bbox format should be ltrb, but got {bbox_format}'
+        assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'
+
+        index = random.choices(self.indices, k=1)[0]
+        sample2 = self.get_sample(index)
+        if pre_transform:
+            for _i, ori_trans in enumerate(pre_transform):
+                _trans = ori_trans.copy()
+                func_name, prob = _trans.pop("func_name"), _trans.pop("prob", 1.0)
+                if func_name == 'copy_paste':
+                    sample2 = self.copy_paste(sample2, prob)
+                elif random.random() < prob:
+                    if func_name == "albumentations" and getattr(self, "albumentations", None) is None:
+                        self.albumentations = Albumentations(size=self.img_size, **_trans)
+                    sample2 = getattr(self, func_name)(sample2, **_trans)
+
+        assert isinstance(sample['segments'], np.ndarray), \
+            f"MixUp: sample segments type expect numpy.ndarray, but got {type(sample['segments'])}; " \
+            f"maybe you should resample_segments before that."
+        assert isinstance(sample2['segments'], np.ndarray), \
+            f"MixUp: sample2 segments type expect numpy.ndarray, but got {type(sample2['segments'])}; " \
+            f"maybe you should add resample_segments in pre_transform."
+
+        image, image2 = sample['img'], sample2['img']
+        r = np.random.beta(alpha, beta)  # mixup ratio, alpha=beta=8.0
+        image = (image * r + image2 * (1 - r)).astype(np.uint8)
+
+        sample['img'] = image
+        sample['cls'] = np.concatenate((sample['cls'], sample2['cls']), 0)
+        sample['bboxes'] = np.concatenate((sample['bboxes'], sample2['bboxes']), 0)
+        sample['segments'] = np.concatenate((sample['segments'], sample2['segments']), 0)
+        return sample
+
+    def pastein(self, sample, num_sample=30):
+        bbox_format = sample['bbox_format']
+        assert bbox_format == 'ltrb', f'The bbox format should be ltrb, but got {bbox_format}'
+        assert not self.return_segments, "pastein currently does not support seg data."
+        assert not self.return_keypoints, "pastein currently does not support keypoint data."
+        sample.pop('segments', None)
+        sample.pop('keypoints', None)
+
+        image = sample['img']
+        cls = sample['cls']
+        bboxes = sample['bboxes']
+        # load sample
+        sample_labels, sample_images, sample_masks = [], [], []
+        while len(sample_labels) < num_sample:
+            sample_labels_, sample_images_, sample_masks_ = self._pastin_load_samples()
+            sample_labels += sample_labels_
+            sample_images += sample_images_
+            sample_masks += sample_masks_
+            if len(sample_labels) == 0:
+                break
+
+        # Applies image cutout augmentation https://arxiv.org/abs/1708.04552
+        h, w = image.shape[:2]
+
+        # create random masks
+        scales = [0.75] * 2 + [0.5] * 4 + [0.25] * 4 + [0.125] * 4 + [0.0625] * 6  # image size fraction
+        for s in scales:
+            if random.random() < 0.2:
+                continue
+            mask_h = random.randint(1, int(h * s))
+            mask_w = random.randint(1, int(w * s))
+
+            # box
+            xmin = max(0, random.randint(0, w) - mask_w // 2)
+            ymin = max(0, random.randint(0, h) - mask_h // 2)
+            xmax = min(w, xmin + mask_w)
+            ymax = min(h, ymin + mask_h)
+
+            box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
+            if len(bboxes):
+                ioa = bbox_ioa(box, bboxes)  # intersection over area
+            else:
+                ioa = np.zeros(1)
+
+            if (
+                    (ioa < 0.30).all() and len(sample_labels) and (xmax > xmin + 20) and (ymax > ymin + 20)
+            ):  # allow 30% obscuration of existing labels
+                sel_ind = random.randint(0, len(sample_labels) - 1)
+                hs, ws, cs = sample_images[sel_ind].shape
+                r_scale = min((ymax - ymin) / hs, (xmax - xmin) / ws)
+                r_w = int(ws * r_scale)
+                r_h = int(hs * r_scale)
+
+                if (r_w > 10) and (r_h > 10):
+                    r_mask = cv2.resize(sample_masks[sel_ind], (r_w, r_h))
+                    r_image = cv2.resize(sample_images[sel_ind], (r_w, r_h))
+                    temp_crop = image[ymin: ymin + r_h, xmin: xmin + r_w]
+                    m_ind = r_mask > 0
+                    if m_ind.astype(np.int_).sum() > 60:
+                        temp_crop[m_ind] = r_image[m_ind]
+                        box = np.array([xmin, ymin, xmin + r_w, ymin + r_h], dtype=np.float32)
+                        if len(bboxes):
+                            cls = np.concatenate((cls, [[sample_labels[sel_ind]]]), 0)
+                            bboxes = np.concatenate((bboxes, [box]), 0)
+                        else:
+                            cls = np.array([[sample_labels[sel_ind]]])
+                            bboxes = np.array([box])
+
+                        image[ymin: ymin + r_h, xmin: xmin + r_w] = temp_crop  # Modify on the original image
+
+        sample['img'] = image
+        sample['bboxes'] = bboxes
+        sample['cls'] = cls
+        return sample
+
+    def _pastin_load_samples(self):
+        # loads images in a 4-mosaic
+        classes4, bboxes4, segments4 = [], [], []
+        mosaic_samples = []
+        indices = random.choices(self.indices, k=4)  # 3 additional image indices
+        mosaic_samples += [self.get_sample(i) for i in indices]
+        s = self.img_size
+        mosaic_border = [-s // 2, -s // 2]
+        yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in mosaic_border]  # mosaic center x, y
+
+        for i, sample in enumerate(mosaic_samples):
+            # Load image
+            img = sample['img']
+            (h, w) = img.shape[:2]
+
+            # place img in img4
+            if i == 0:  # top left
+                img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
+            elif i == 1:  # top right
+                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
+                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
+            elif i == 2:  # bottom left
+                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
+            elif i == 3:  # bottom right
+                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
+
+            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+            padw = x1a - x1b
+            padh = y1a - y1b
+
+            # Labels
+            cls, bboxes = sample['cls'], sample['bboxes']
+            bboxes = xywhn2xyxy(bboxes, w, h, padw, padh)  # normalized xywh to pixel xyxy format
+
+            classes4.append(cls)
+            bboxes4.append(bboxes)
+
+            segments = sample['segments']
+            segments_is_list = isinstance(segments, list)
+            if segments_is_list:
+                segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
+                segments4.extend(segments)
+            else:
+                segments = xyn2xy(segments, w, h, padw, padh)
+                segments4.append(segments)
+
+        # Concat/clip labels
+        classes4 = np.concatenate(classes4, 0)
+        bboxes4 = np.concatenate(bboxes4, 0)
+        bboxes4 = bboxes4.clip(0, 2 * s)
+
+        if segments_is_list:
+            for x in segments4:
+                np.clip(x, 0, 2 * s, out=x)
+        else:
+            segments4 = np.concatenate(segments4, 0)
+            segments4 = segments4.clip(0, 2 * s)
+
+        # Augment
+        sample_labels, sample_images, sample_masks = \
+            self._pastin_sample_segments(img4, classes4, bboxes4, segments4, probability=0.5)
+
+        return sample_labels, sample_images, sample_masks
+
+    def _pastin_sample_segments(self, img, classes, bboxes, segments, probability=0.5):
+        # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
+        n = len(segments)
+        sample_labels = []
+        sample_images = []
+        sample_masks = []
+        if probability and n:
+            h, w, c = img.shape  # height, width, channels
+            for j in random.sample(range(n), k=round(probability * n)):
+                cls, l, s = classes[j], bboxes[j], segments[j]
+                box = (
+                    l[0].astype(int).clip(0, w - 1),
+                    l[1].astype(int).clip(0, h - 1),
+                    l[2].astype(int).clip(0, w - 1),
+                    l[3].astype(int).clip(0, h - 1),
+                )
+
+                if (box[2] <= box[0]) or (box[3] <= box[1]):
+                    continue
+
+                sample_labels.append(cls[0])
+
+                mask = np.zeros(img.shape, np.uint8)
+
+                cv2.drawContours(mask, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
+                sample_masks.append(mask[box[1]: box[3], box[0]: box[2], :])
+
+                result = cv2.bitwise_and(src1=img, src2=mask)
+                i = result > 0  # pixels to replace
+                mask[i] = result[i]  # cv2.imwrite('debug.jpg', img)  # debug
+                sample_images.append(mask[box[1]: box[3], box[0]: box[2], :])
+
+        return sample_labels, sample_images, sample_masks
+
+    def hsv_augment(self, sample, hgain=0.5, sgain=0.5, vgain=0.5):
+        image = sample['img']
+        r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1  # random gains
+        hue, sat, val = cv2.split(cv2.cvtColor(image, cv2.COLOR_BGR2HSV))
+        dtype = image.dtype  # uint8
+
+        x = np.arange(0, 256, dtype=np.int16)
+        lut_hue = ((x * r[0]) % 180).astype(dtype)
+        lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
+        lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
+
+        img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
+        cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=image)  # Modify on the original image
+
+        sample['img'] = image
+        return sample
+
+    def fliplr(self, sample):
+        # flip image left-right
+        image = sample['img']
+        image = np.fliplr(image)
+        sample['img'] = image
+
+        # flip box
+        _, w = image.shape[:2]
+        bboxes, bbox_format = sample['bboxes'], sample['bbox_format']
+        if bbox_format == "ltrb":
+            if len(bboxes):
+                x1 = bboxes[:, 0].copy()
+                x2 = bboxes[:, 2].copy()
+                bboxes[:, 0] = w - x2
+                bboxes[:, 2] = w - x1
+        elif bbox_format == "xywhn":
+            if len(bboxes):
+                bboxes[:, 0] = 1 - bboxes[:, 0]
+        else:
+            raise NotImplementedError
+        sample['bboxes'] = bboxes
+
+        # flip seg
+        if self.return_segments:
+            segment_format, segments = sample['segment_format'], sample['segments']
+            assert segment_format == 'polygon', \
+                f'FlipLR: The segment format should be polygon, but got {segment_format}'
+            assert isinstance(segments, np.ndarray), \
+                f"FlipLR: segments type expect numpy.ndarray, but got {type(segments)}; " \
+                f"maybe you should resample_segments before that."
+
+            if len(segments):
+                segments[..., 0] = w - segments[..., 0]
+
+            sample['segments'] = segments
+
+        return sample
+
+    def letterbox(self, sample, new_shape=None, xywhn2xyxy_=True, scaleup=False, only_image=False, color=(114, 114, 114)):
+        # Resize and pad image while meeting stride-multiple constraints
+        if sample['bbox_format'] == 'ltrb':
+            xywhn2xyxy_ = False
+
+        if not new_shape:
+            new_shape = self.img_size
+
+        if isinstance(new_shape, int):
+            new_shape = (new_shape, new_shape)
+
+        image = sample['img']
+        shape = image.shape[:2]  # current shape [height, width]
+
+        h, w = shape[:]
+        ori_shape = sample['ori_shape']
+        h0, w0 = ori_shape
+        hw_scale = np.array([h / h0, w / w0])
+        sample['hw_scale'] = hw_scale
+
+        # Scale ratio (new / old)
+        r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+        if not scaleup:  # only scale down, do not scale up (for better test mAP)
+            r = min(r, 1.0)
+
+        # Compute padding
+        new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+        dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+
+        dw /= 2  # divide padding into 2 sides
+        dh /= 2
+        hw_pad = np.array([dh, dw])
+
+        if shape != new_shape:
+            if shape[::-1] != new_unpad:  # resize
+                image = cv2.resize(image, new_unpad, interpolation=cv2.INTER_LINEAR)
+            top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+            left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+            image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+            sample['hw_pad'] = hw_pad
+        else:
+            sample['hw_pad'] = np.array([0., 0.])
+        bboxes = sample['bboxes']
+        if not only_image:
+            # convert bboxes
+            if len(bboxes):
+                if xywhn2xyxy_:
+                    bboxes = xywhn2xyxy(bboxes, r * w, r * h, padw=dw, padh=dh)
+                else:
+                    bboxes *= r
+                    bboxes[:, [0, 2]] += dw
+                    bboxes[:, [1, 3]] += dh
+                sample['bboxes'] = bboxes
+            sample['bbox_format'] = 'ltrb'
+
+            # convert segments
+            if 'segments' in sample:
+                segments, segment_format = sample['segments'], sample['segment_format']
+                assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'
+
+                if len(segments):
+                    if isinstance(segments, np.ndarray):
+                        if xywhn2xyxy_:
+                            segments[..., 0] *= w
+                            segments[..., 1] *= h
+                        else:
+                            segments *= r
+                        segments[..., 0] += dw
+                        segments[..., 1] += dh
+                    elif isinstance(segments, list):
+                        for segment in segments:
+                            if xywhn2xyxy_:
+                                segment[..., 0] *= w
+                                segment[..., 1] *= h
+                            else:
+                                segment *= r
+                            segment[..., 0] += dw
+                            segment[..., 1] += dh
+                    sample['segments'] = segments
+
+        sample['img'] = image
+        return sample
+
+    def label_norm(self, sample, xyxy2xywh_=True):
+        bbox_format = sample['bbox_format']
+        if bbox_format == "xywhn":
+            return sample
+
+        bboxes = sample['bboxes']
+        if len(bboxes) == 0:
+            sample['bbox_format'] = 'xywhn'
+            return sample
+
+        if xyxy2xywh_:
+            bboxes = xyxy2xywh(bboxes)  # convert xyxy to xywh
+        height, width = sample['img'].shape[:2]
+        bboxes[:, [1, 3]] /= height  # normalized height 0-1
+        bboxes[:, [0, 2]] /= width  # normalized width 0-1
+        sample['bboxes'] = bboxes
+        sample['bbox_format'] = 'xywhn'
+
+        return sample
+
+    def label_pad(self, sample, padding_size=160, padding_value=-1):
+        # create fixed label, avoid dynamic shape problem.
+        bbox_format = sample['bbox_format']
+        assert bbox_format == 'xywhn', f'The bbox format should be xywhn, but got {bbox_format}'
+
+        cls, bboxes = sample['cls'], sample['bboxes']
+        cls_pad = np.full((padding_size, 1), padding_value, dtype=np.float32)
+        bboxes_pad = np.full((padding_size, 4), padding_value, dtype=np.float32)
+        nL = len(bboxes)
+        if nL:
+            cls_pad[:min(nL, padding_size)] = cls[:min(nL, padding_size)]
+            bboxes_pad[:min(nL, padding_size)] = bboxes[:min(nL, padding_size)]
+        sample['cls'] = cls_pad
+        sample['bboxes'] = bboxes_pad
+
+        if "segments" in sample:
+            if sample['segment_format'] == "mask":
+                segments = sample['segments']
+                assert isinstance(segments, np.ndarray), \
+                    f"Label Pad: segments type expect numpy.ndarray, but got {type(segments)}; " \
+                    f"maybe you should resample_segments before that."
+                assert nL == segments.shape[0], f"Label Pad: segments len not equal bboxes"
+                h, w = segments.shape[1:]
+                segments_pad = np.full((padding_size, h, w), padding_value, dtype=np.float32)
+                segments_pad[:min(nL, padding_size)] = segments[:min(nL, padding_size)]
+                sample['segments'] = segments_pad
+
+        return sample
+
+    def image_norm(self, sample, scale=255.0):
+        image = sample['img']
+        image = image.astype(np.float32, copy=False)
+        image /= scale
+        sample['img'] = image
+        return sample
+
+    def image_transpose(self, sample, bgr2rgb=True, hwc2chw=True):
+        image = sample['img']
+        if bgr2rgb:
+            image = image[:, :, ::-1]
+        if hwc2chw:
+            image = image.transpose(2, 0, 1)
+        sample['img'] = image
+        return sample
+
+    def segment_poly2mask(self, sample, mask_overlap, mask_ratio):
+        """convert polygon points to bitmap."""
+        segments, segment_format = sample['segments'], sample['segment_format']
+        assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'
+        assert isinstance(segments, np.ndarray), \
+            f"Segment Poly2Mask: segments type expect numpy.ndarray, but got {type(segments)}; " \
+            f"maybe you should resample_segments before that."
+
+        h, w = sample['img'].shape[:2]
+        if mask_overlap:
+            masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=mask_ratio)
+            sample['cls'] = sample['cls'][sorted_idx]
+            sample['bboxes'] = sample['bboxes'][sorted_idx]
+            sample['segments'] = masks  # (h/mask_ratio, w/mask_ratio)
+            sample['segment_format'] = 'overlap'
+        else:
+            masks = polygons2masks((h, w), segments, color=1, downsample_ratio=mask_ratio)
+            sample['segments'] = masks
+            sample['segment_format'] = 'mask'
+
+        return sample
+
+    def _img2label_paths(self, img_paths):
+        # Define label paths as a function of image paths
+        sa, sb = os.sep + "images" + os.sep, os.sep + "labels" + os.sep  # /images/, /labels/ substrings
+        return ["txt".join(x.replace(sa, sb, 1).rsplit(x.split(".")[-1], 1)) for x in img_paths]
+
+    def _get_hash(self, paths):
+        # Returns a single hash value of a list of paths (files or dirs)
+        size = sum(os.path.getsize(p) for p in paths if os.path.exists(p))  # sizes
+        h = hashlib.md5(str(size).encode())  # hash sizes
+        h.update("".join(paths).encode())  # hash paths
+        return h.hexdigest()  # return hash
+
+    def _exif_size(self, img):
+        # Returns exif-corrected PIL size
+        s = img.size  # (width, height)
+        try:
+            rotation = dict(img._getexif().items())[orientation]
+            if rotation == 6:  # rotation 270
+                s = (s[1], s[0])
+            elif rotation == 8:  # rotation 90
+                s = (s[1], s[0])
+        except:
+            pass
+
+        return s
+
+    def train_collate_fn(self, batch_samples, batch_info):
+        imgs = [sample.pop('img') for sample in batch_samples]
+        labels = []
+        for i, sample in enumerate(batch_samples):
+            cls, bboxes = sample.pop('cls'), sample.pop('bboxes')
+            labels.append(np.concatenate((np.full_like(cls, i), cls, bboxes), axis=-1))
+        return_items = [np.stack(imgs, 0), np.stack(labels, 0)]
+
+        if self.return_segments:
+            masks = [sample.pop('segments', None) for sample in batch_samples]
+            return_items.append(np.stack(masks, 0))
+        if self.return_keypoints:
+            keypoints = [sample.pop('keypoints', None) for sample in batch_samples]
+            return_items.append(np.stack(keypoints, 0))
+
+        return tuple(return_items)
+
+    def test_collate_fn(self, batch_samples, batch_info):
+        imgs = [sample.pop('img') for sample in batch_samples]
+        path = [sample.pop('im_file') for sample in batch_samples]
+        hw_ori = [sample.pop('ori_shape') for sample in batch_samples]
+        hw_scale = [sample.pop('hw_scale') for sample in batch_samples]
+        pad = [sample.pop('hw_pad') for sample in batch_samples]
+        return (
+            np.stack(imgs, 0),
+            path,
+            np.stack(hw_ori, 0),
+            np.stack(hw_scale, 0),
+            np.stack(pad, 0),
+        )
+
+
+ + + +
+ + + + + + + + + +
+ + +

+ mindyolo.data.dataset.COCODataset.get_sample(index) + +

+ + +
+ +

Get and return label information from the dataset.

+ +
+ Source code in mindyolo/data/dataset.py +
321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
def get_sample(self, index):
+    """Get and return label information from the dataset."""
+    sample = deepcopy(self.labels[index])
+    if self.imgs is None:
+        path = self.img_files[index]
+        img = cv2.imread(path)  # BGR
+        assert img is not None, "Image Not Found " + path
+        h_ori, w_ori = img.shape[:2]  # orig hw
+        r = self.img_size / max(h_ori, w_ori)  # resize image to img_size
+        if r != 1:  # always resize down, only resize up if training with augmentation
+            interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
+            img = cv2.resize(img, (int(w_ori * r), int(h_ori * r)), interpolation=interp)
+
+        sample['img'], sample['ori_shape'] = img, np.array([h_ori, w_ori])  # img, hw_original
+
+    else:
+        sample['img'], sample['ori_shape'] = self.imgs[index], self.img_hw_ori[index]  # img, hw_original
+
+    return sample
+
+
+
+ +
+ +
+ + +

+ mindyolo.data.dataset.COCODataset.segment_poly2mask(sample, mask_overlap, mask_ratio) + +

+ + +
+ +

convert polygon points to bitmap.

+ +
+ Source code in mindyolo/data/dataset.py +
1111
+1112
+1113
+1114
+1115
+1116
+1117
+1118
+1119
+1120
+1121
+1122
+1123
+1124
+1125
+1126
+1127
+1128
+1129
+1130
+1131
def segment_poly2mask(self, sample, mask_overlap, mask_ratio):
+    """convert polygon points to bitmap."""
+    segments, segment_format = sample['segments'], sample['segment_format']
+    assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'
+    assert isinstance(segments, np.ndarray), \
+        f"Segment Poly2Mask: segments type expect numpy.ndarray, but got {type(segments)}; " \
+        f"maybe you should resample_segments before that."
+
+    h, w = sample['img'].shape[:2]
+    if mask_overlap:
+        masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=mask_ratio)
+        sample['cls'] = sample['cls'][sorted_idx]
+        sample['bboxes'] = sample['bboxes'][sorted_idx]
+        sample['segments'] = masks  # (h/mask_ratio, w/mask_ratio)
+        sample['segment_format'] = 'overlap'
+    else:
+        masks = polygons2masks((h, w), segments, color=1, downsample_ratio=mask_ratio)
+        sample['segments'] = masks
+        sample['segment_format'] = 'mask'
+
+    return sample
+
+
+
+ +
+ + + +
+ +
+ +

Albumentations

+ + +
+ + + +

+ mindyolo.data.albumentations.Albumentations + + +

+ + +
+ + +
+ Source code in mindyolo/data/albumentations.py +
10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
class Albumentations:
+    # Implement Albumentations augmentation https://github.com/ultralytics/yolov5
+    # YOLOv5 Albumentations class (optional, only used if package is installed)
+    def __init__(self, size=640, random_resized_crop=True, **kwargs):
+        self.transform = None
+        prefix = _colorstr("albumentations: ")
+        try:
+            import albumentations as A
+
+            _check_version(A.__version__, "1.0.3", hard=True)  # version requirement
+            T = []
+            if random_resized_crop:
+                T.extend([
+                    A.RandomResizedCrop(height=size, width=size, scale=(0.8, 1.0), ratio=(0.9, 1.11), p=0.0),
+                ])
+            T.extend([
+                A.Blur(p=0.01),
+                A.MedianBlur(p=0.01),
+                A.ToGray(p=0.01),
+                A.CLAHE(p=0.01),
+                A.RandomBrightnessContrast(p=0.0),
+                A.RandomGamma(p=0.0),
+                A.ImageCompression(quality_lower=75, p=0.0),
+            ])
+            self.transform = A.Compose(T, bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]))
+
+            print(prefix + ", ".join(f"{x}".replace("always_apply=False, ", "") for x in T if x.p), flush=True)
+            print("[INFO] albumentations load success", flush=True)
+        except ImportError:  # package not installed, skip
+            pass
+            print("[WARNING] package not installed, albumentations load failed", flush=True)
+        except Exception as e:
+            print(f"{prefix}{e}", flush=True)
+            print("[WARNING] albumentations load failed", flush=True)
+
+    def __call__(self, sample, p=1.0, **kwargs):
+        if self.transform and random.random() < p:
+            im, bboxes, cls, bbox_format = sample['img'], sample['bboxes'], sample['cls'], sample['bbox_format']
+            assert bbox_format in ("ltrb", "xywhn")
+            if bbox_format == "ltrb" and bboxes.shape[0] > 0:
+                h, w = im.shape[:2]
+                bboxes = xyxy2xywh(bboxes)
+                bboxes[:, [0, 2]] /= w
+                bboxes[:, [1, 3]] /= h
+
+            new = self.transform(image=im, bboxes=bboxes, class_labels=cls)  # transformed
+
+            sample['img'] = new['image']
+            sample['bboxes'] = np.array(new['bboxes'])
+            sample['cls'] = np.array(new['class_labels'])
+            sample['bbox_format'] = "xywhn"
+
+        return sample
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/reference/models/index.html b/en/reference/models/index.html new file mode 100644 index 00000000..921219fb --- /dev/null +++ b/en/reference/models/index.html @@ -0,0 +1,1709 @@ + + + + + + + + + + + + + + + + + + + + + + + + Models - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Models

+

Create Model

+ + +
+ + +

+ mindyolo.models.model_factory.create_model(model_name, model_cfg=None, in_channels=3, num_classes=80, checkpoint_path='', **kwargs) + +

+ + +
+ +
+ Source code in mindyolo/models/model_factory.py +
16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
def create_model(
+    model_name: str,
+    model_cfg: dict = None,
+    in_channels: int = 3,
+    num_classes: int = 80,
+    checkpoint_path: str = "",
+    **kwargs,
+):
+    model_args = dict(cfg=model_cfg, num_classes=num_classes, in_channels=in_channels)
+    kwargs = {k: v for k, v in kwargs.items() if v is not None}
+
+    if not is_model(model_name):
+        raise RuntimeError(f"Unknown model {model_name}")
+
+    create_fn = model_entrypoint(model_name)
+    model = create_fn(**model_args, **kwargs)
+
+    if checkpoint_path:
+        assert os.path.isfile(checkpoint_path) and checkpoint_path.endswith(
+            ".ckpt"
+        ), f"[{checkpoint_path}] not a ckpt file."
+        checkpoint_param = load_checkpoint(checkpoint_path)
+        load_param_into_net(model, checkpoint_param)
+        logger.info(f"Load checkpoint from [{checkpoint_path}] success.")
+
+    return model
+
+
+
+ +

YOLOV3

+ + +
+ + +

+ mindyolo.models.yolov3(cfg, in_channels=3, num_classes=None, **kwargs) + +

+ + +
+ +

Get yolov3 model.

+ +
+ Source code in mindyolo/models/yolov3.py +
44
+45
+46
+47
+48
@register_model
+def yolov3(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv3:
+    """Get yolov3 model."""
+    model = YOLOv3(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+
+ +

YOLOV4

+ + +
+ + +

+ mindyolo.models.yolov4(cfg, in_channels=3, num_classes=None, **kwargs) + +

+ + +
+ +

Get yolov4 model.

+ +
+ Source code in mindyolo/models/yolov4.py +
33
+34
+35
+36
+37
@register_model
+def yolov4(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv4:
+    """Get yolov4 model."""
+    model = YOLOv4(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+
+ +

YOLOV5

+ + +
+ + +

+ mindyolo.models.yolov5(cfg, in_channels=3, num_classes=None, **kwargs) + +

+ + +
+ +

Get yolov5 model.

+ +
+ Source code in mindyolo/models/yolov5.py +
44
+45
+46
+47
+48
@register_model
+def yolov5(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv5:
+    """Get yolov5 model."""
+    model = YOLOv5(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+
+ +

YOLOV7

+ + +
+ + +

+ mindyolo.models.yolov7(cfg, in_channels=3, num_classes=None, **kwargs) + +

+ + +
+ +

Get yolov7 model.

+ +
+ Source code in mindyolo/models/yolov7.py +
46
+47
+48
+49
+50
@register_model
+def yolov7(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv7:
+    """Get yolov7 model."""
+    model = YOLOv7(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+
+ +

YOLOV8

+ + +
+ + +

+ mindyolo.models.yolov8(cfg, in_channels=3, num_classes=None, **kwargs) + +

+ + +
+ +

Get yolov8 model.

+ +
+ Source code in mindyolo/models/yolov8.py +
45
+46
+47
+48
+49
@register_model
+def yolov8(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv8:
+    """Get yolov8 model."""
+    model = YOLOv8(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+
+ +

YOLOX

+ + +
+ + +

+ mindyolo.models.yolox(cfg, in_channels=3, num_classes=None, **kwargs) + +

+ + +
+ +

Get yolox model.

+ +
+ Source code in mindyolo/models/yolox.py +
43
+44
+45
+46
+47
@register_model
+def yolox(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOX:
+    """Get yolox model."""
+    model = YOLOX(cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/tutorials/configuration/index.html b/en/tutorials/configuration/index.html new file mode 100644 index 00000000..7bbccf9d --- /dev/null +++ b/en/tutorials/configuration/index.html @@ -0,0 +1,1730 @@ + + + + + + + + + + + + + + + + + + + + + + + + Configuration - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Configuration

+

MindYOLO supports parameter parsing from both yaml files and command lines. The parameters which are fixed, complex, highly related to model or with nested structure are placed in yaml files. While the simpler ones or parameters variants with actual cases could be passed in from the command line.

+

The following takes yolov3 as an example to explain how to configure the corresponding parameters.

+

Parameter Inheritance Relationship

+

As follows, the parameter priority is from high to low. When a parameter with the same name appears, the low-priority parameter will be overwritten by the high-priority parameter.

+
    +
  • Parameters inputted with user command lines
  • +
  • Default parameters set in parser from .py files
  • +
  • Parameters in yaml files specified by user command lines
  • +
  • Parameters in yaml files set by __BASE__ contained in yaml files specified by user command lines. Take yolov3 as an example, it contains: +
    __BASE__: [
    +  '../coco.yaml',
    +  './hyp.scratch.yaml',
    +]
    +
  • +
+

Basic Parameters

+

Parameter Description

+
    +
  • device_target: device used, Ascend/GPU/CPU
  • +
  • save_dir: the path to save the running results, the default is ./runs
  • +
  • log_interval: step interval to print logs, the default is 100
  • +
  • is_parallel: whether to perform distributed training, the default is False
  • +
  • ms_mode: whether to use static graph mode (0) or dynamic graph mode (1), the default is 0.
  • +
  • config: yaml configuration file path
  • +
  • per_batch_size: batch size of each card, default is 32
  • +
  • epochs: number of training epochs, default is 300
  • +
  • ...
  • +
+

Parse parameter settings

+

This part of the parameters is usually passed in from the command line. Examples are as follows:

+
mpirun --allow-run-as-root -n 8 python train.py --config ./configs/yolov7/yolov7.yaml  --is_parallel True --log_interval 50
+
+

Dataset

+

Parameter Description

+
    +
  • dataset_name: dataset name
  • +
  • train_set: the path where the training set is located
  • +
  • val_set: the path where the verification set is located
  • +
  • test_set: the path where the test set is located
  • +
  • nc: number of categories in the data set
  • +
  • names: category name + -...
  • +
+

Yaml file sample

+

This part of the parameters is defined in configs/coco.yaml, and the data set path usually needs to be modified.

+
data:
+  dataset_name: coco
+
+  train_set: ./coco/train2017.txt  # 118287 images
+  val_set: ./coco/val2017.txt  # 5000 images
+  test_set: ./coco/test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
+
+  nc: 80
+
+  # class names
+  names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
+           'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+           'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+           'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+           'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+           'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+           'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
+           'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+           'hair drier', 'toothbrush' ]
+
+

Data Augmentation

+

Parameter Description

+
    +
  • num_parallel_workers: number of worker processes reading data
  • +
  • train_transformers: data enhancement during training process
  • +
  • test_transformers: verification process data enhancement + -...
  • +
+

Yaml file sample

+

This part of the parameters is defined in configs/yolov3/hyp.scratch.yaml, where train_transformers and test_transformers are lists composed of dictionaries, each dictionary contains data enhancement operations name, probability of occurrence and parameters related to the enhancement method

+
data:
+  num_parallel_workers: 4
+
+  train_transforms:
+    - { func_name: mosaic, prob: 1.0, mosaic9_prob: 0.0, translate: 0.1, scale: 0.9 }
+    - { func_name: mixup, prob: 0.1, alpha: 8.0, beta: 8.0, needed_mosaic: True }
+    - { func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4 }
+    - { func_name: label_norm, xyxy2xywh_: True }
+    - { func_name: albumentations }
+    - { func_name: fliplr, prob: 0.5 }
+    - { func_name: label_pad, padding_size: 160, padding_value: -1 }
+    - { func_name: image_norm, scale: 255. }
+    - { func_name: image_transpose, bgr2rgb: True, hwc2chw: True }
+
+  test_transforms:
+    - { func_name: letterbox, scaleup: False }
+    - { func_name: label_norm, xyxy2xywh_: True }
+    - { func_name: label_pad, padding_size: 160, padding_value: -1 }
+    - { func_name: image_norm, scale: 255. }
+    - { func_name: image_transpose, bgr2rgb: True, hwc2chw: True }
+
+

Model

+

Parameter Description

+
    +
  • model_name: model name
  • +
  • depth_multiple: model depth factor
  • +
  • width_multiple: model width factor
  • +
  • stride: feature map downsampling multiple
  • +
  • anchors: default anchor box
  • +
  • backbone: model backbone network
  • +
  • head: model detection head
  • +
+

Yaml file sample

+

This part of the parameters is defined in configs/yolov3/yolov3.yaml. The network is constructed based on the backbone and head parameters. The parameters are presented in the form of a nested list, with each line representing a The layer module contains 4 parameters, namely the input layer number (-1 represents the previous layer), the number of module repetitions, the module name and the corresponding parameters of the module. Users can also define and register networks directly in py files without resorting to yaml files. +

network:
+  model_name: yolov3
+
+  depth_multiple: 1.0  # model depth multiple
+  width_multiple: 1.0  # layer channel multiple
+  stride: [8, 16, 32]
+  anchors:
+    - [10,13, 16,30, 33,23]  # P3/8
+    - [30,61, 62,45, 59,119]  # P4/16
+    - [116,90, 156,198, 373,326]  # P5/32
+
+  # darknet53 backbone
+  backbone:
+    # [from, number, module, args]
+    [[-1, 1, ConvNormAct, [32, 3, 1]],  # 0
+     [-1, 1, ConvNormAct, [64, 3, 2]],  # 1-P1/2
+     [-1, 1, Bottleneck, [64]],
+     [-1, 1, ConvNormAct, [128, 3, 2]],  # 3-P2/4
+     [-1, 2, Bottleneck, [128]],
+     [-1, 1, ConvNormAct, [256, 3, 2]],  # 5-P3/8
+     [-1, 8, Bottleneck, [256]],
+     [-1, 1, ConvNormAct, [512, 3, 2]],  # 7-P4/16
+     [-1, 8, Bottleneck, [512]],
+     [-1, 1, ConvNormAct, [1024, 3, 2]],  # 9-P5/32
+     [-1, 4, Bottleneck, [1024]],  # 10
+    ]
+
+  # YOLOv3 head
+  head:
+    [[-1, 1, Bottleneck, [1024, False]],
+     [-1, 1, ConvNormAct, [512, 1, 1]],
+     [-1, 1, ConvNormAct, [1024, 3, 1]],
+     [-1, 1, ConvNormAct, [512, 1, 1]],
+     [-1, 1, ConvNormAct, [1024, 3, 1]],  # 15 (P5/32-large)
+
+     [-2, 1, ConvNormAct, [256, 1, 1]],
+     [-1, 1, Upsample, [None, 2, 'nearest']],
+     [[-1, 8], 1, Concat, [1]],  # cat backbone P4
+     [-1, 1, Bottleneck, [512, False]],
+     [-1, 1, Bottleneck, [512, False]],
+     [-1, 1, ConvNormAct, [256, 1, 1]],
+     [-1, 1, ConvNormAct, [512, 3, 1]],  # 22 (P4/16-medium)
+
+     [-2, 1, ConvNormAct, [128, 1, 1]],
+     [-1, 1, Upsample, [None, 2, 'nearest']],
+     [[-1, 6], 1, Concat, [1]],  # cat backbone P3
+     [-1, 1, Bottleneck, [256, False]],
+     [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
+
+     [[27, 22, 15], 1, YOLOv3Head, [nc, anchors, stride]],   # Detect(P3, P4, P5)
+    ]
+

+

Loss function

+

Parameter Description

+
    +
  • name: loss function name
  • +
  • box: box loss weight
  • +
  • cls: class loss weight
  • +
  • cls_pw: class loss positive sample weight
  • +
  • obj: object loss weight
  • +
  • obj_pw: object loss positive sample weight
  • +
  • fl_gamma: focal loss gamma
  • +
  • anchor_t: anchor shape proportion threshold
  • +
  • label_smoothing: label smoothing value
  • +
+

Yaml file sample

+

This part of the parameters is defined in configs/yolov3/hyp.scratch.yaml

+
loss:
+  name: YOLOv7Loss
+  box: 0.05  # box loss gain
+  cls: 0.5  # cls loss gain
+  cls_pw: 1.0  # cls BCELoss positive_weight
+  obj: 1.0  # obj loss gain (scale with pixels)
+  obj_pw: 1.0  # obj BCELoss positive_weight
+  fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
+  anchor_t: 4.0  # anchor-multiple threshold
+  label_smoothing: 0.0 # label smoothing epsilon
+
+

Optimizer

+

Parameter Description

+
    +
  • optimizer: optimizer name.
  • +
  • lr_init: initial value of learning rate
  • +
  • warmup_epochs: number of warmup epochs
  • +
  • warmup_momentum: initial value of warmup momentum
  • +
  • warmup_bias_lr: initial value of warmup bias learning rate
  • +
  • min_warmup_step: minimum number of warmup steps
  • +
  • group_param: parameter grouping strategy
  • +
  • gp_weight_decay: Group parameter weight decay coefficient
  • +
  • start_factor: initial learning rate factor
  • +
  • end_factor: end learning rate factor
  • +
  • momentum: momentum of the moving average
  • +
  • loss_scale: loss scaling coefficient
  • +
  • nesterov: Whether to use the Nesterov Accelerated Gradient (NAG) algorithm to update the gradient.
  • +
+

Yaml file sample

+

This part of the parameters is defined in configs/yolov3/hyp.scratch.yaml. In the following example, the initial learning rate after the warmup stage is lr_init * start_factor = 0.01 * 1.0 = 0.01, the final learning rate is lr_init * end_factor = 0.01 * 0.01 = 0.0001

+
optimizer:
+  optimizer: momentum
+  lr_init: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
+  momentum: 0.937  # SGD momentum/Adam beta1
+  nesterov: True # update gradients with NAG(Nesterov Accelerated Gradient) algorithm
+  loss_scale: 1.0 # loss scale for optimizer
+  warmup_epochs: 3  # warmup epochs (fractions ok)
+  warmup_momentum: 0.8  # warmup initial momentum
+  warmup_bias_lr: 0.1  # warmup initial bias lr
+  min_warmup_step: 1000 # minimum warmup step
+  group_param: yolov7 # group param strategy
+  gp_weight_decay: 0.0005  # group param weight decay 5e-4
+  start_factor: 1.0
+  end_factor: 0.01
+
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/tutorials/data_augmentation/index.html b/en/tutorials/data_augmentation/index.html new file mode 100644 index 00000000..08677589 --- /dev/null +++ b/en/tutorials/data_augmentation/index.html @@ -0,0 +1,1375 @@ + + + + + + + + + + + + + + + + + + + + + + + + Augmentation - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Data Augmentation

+

List of data enhancement methods that come with the package

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Data Enhancement Method NameSummary Explanation
mosaicrandomly select mosaic4 and mosaic9
mosaic44-part splicing
mosaic99-point splicing
mixuplinearly mix two images
pasteinclipping enhancement
random_perspectiverandom perspective transformation
hsv_augmentrandom color transformation
fliplrflip horizontally
flipudvertical flip
letterboxscale and fill
label_normlabel normalization and coordinates normalized to 0-1 to range
label_padfill label information into a fixed-size array
image_normimage data normalization
image_transposechannel transpose and dimension transpose
albumentationsalbumentations data enhancement
+

These data augmentation functions are defined in mindyolo/data/dataset.py.

+

Instructions

+

The MindYOLO data enhancement method is configured in the yaml file. For example, to add a data enhancement during the training process, you need to add a dictionary list under the data.train_transforms field of the yaml file. The data enhancement methods are listed from top to bottom.

+

A typical data enhancement method configuration dictionary must have func_name, which represents the name of the applied data enhancement method, and then lists the parameters that need to be set for the method. If no parameter item is configured in the data enhancement configuration dictionary, the data enhancement method will be selected by default. value.

+

Data enhancement common configuration dictionary: +

- {func_name: data enhancement method name 1, args11=x11, args12=x12, ..., args1n=x1n}
+- {func_name: data enhancement method name 2, args21=x21, args22=x22, ..., args2n=x2n}
+...
+- {func_name: data enhancement method name n, argsn1=xn1, argsn2=xn2, ..., argsnn=xnn}
+

+

Example enhanced with YOLOv7 training data: +

#File directory: configs/yolov7/hyp.scratch.tiny.yaml (https://github.com/mindspore-lab/mindyolo/blob/master/configs/yolov7/hyp.scratch.tiny.yaml)
+  train_transforms:
+    - {func_name: mosaic, prob: 1.0, mosaic9_prob: 0.2, translate: 0.1, scale: 0.5}
+    - {func_name: mixup, prob: 0.05, alpha: 8.0, beta: 8.0, needed_mosaic: True}
+    - {func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4}
+    - {func_name: pastein, prob: 0.05, num_sample: 30}
+    - {func_name: label_norm, xyxy2xywh_: True}
+    - {func_name: fliplr, prob: 0.5}
+    - {func_name: label_pad, padding_size: 160, padding_value: -1}
+    - {func_name: image_norm, scale: 255.}
+    - {func_name: image_transpose, bgr2rgb: True, hwc2chw: True}
+
+Note: func_name represents the name of the data enhancement method, prob, mosaic9_prob, translate, and scale are the method parameters. Among them, prob is a parameter common to all methods, indicating the execution probability of the data enhancement method. The default value is 1

+

The specific operations performed by the above yaml file are as follows:

+
    +
  • +

    mosaic: Perform mosaic operation on the input image with a probability of 1.0, that is, splicing 4 different images into one image. mosaic9_prob represents the probability of splicing using the 9-square grid method, and translate and scale represent the degree of random translation and scaling respectively. +as the picture shows: +

    +
  • +
  • +

    mixup: Perform a mixup operation on the input image with a probability of 0.05, that is, mix two different images. Among them, alpha and beta represent the mixing coefficient, and needed_mosaic represents whether mosaic needs to be used for mixing.

    +
  • +
  • +

    hsv_augment: HSV enhancement, adjust the HSV color space of the input image with a probability of 1.0 to increase data diversity. Among them, hgain, sgain and vgain represent the degree of adjustment of H, S and V channels respectively.

    +
  • +
  • +

    pastein: randomly paste some samples into the input image with a probability of 0.05. Among them, num_sample represents the number of randomly posted samples.

    +
  • +
  • +

    label_norm: Convert the input label from the format of (x1, y1, x2, y2) to the format of (x, y, w, h).

    +
  • +
  • +

    fliplr: Flip the input image horizontally with a probability of 0.5 to increase data diversity.

    +
  • +
  • +

    label_pad: Pad the input labels so that each image has the same number of labels. padding_size represents the number of labels after padding, and padding_value represents the value of padding.

    +
  • +
  • +

    image_norm: Scale the input image pixel value from the range [0, 255] to the range [0, 1].

    +
  • +
  • +

    image_transpose: Convert the input image from BGR format to RGB format, and convert the number of channels of the image from HWC format to CHW format.

    +
  • +
+

Test data enhancement needs to be marked with the test_transforms field, and the configuration method is the same as training.

+

Custom data enhancement

+

Writing Guide:

+
    +
  • Add custom data enhancement methods to the COCODataset class in the mindyolo/data/dataset.py file
  • +
  • Inputs to data augmentation methods usually include images, labels, and custom parameters.
    +-Write function body content and customize output
  • +
+

A typical data enhancement method: +

#Add submethods in mindyolo/data/dataset.py COCODataset
+def data_trans_func(self, image, labels, args1=x1, args2=x2, ..., argsn=xn):
+    # Data enhancement logic
+    ...
+    return image, labels
+
+Customize a data enhancement function whose function is rotation +
#mindyolo/data/dataset.py
+def rotate(self, image, labels, angle):
+    # rotate image
+    image = np.rot90(image, angle // 90)
+    if len(labels):
+        if angle == 90:
+            labels[:, 0], labels[:, 1] = 1 - labels[:, 1], labels[:, 0]
+        elif angle == 180:
+            labels[:, 0], labels[:, 1] = 1 - labels[:, 0], 1 - labels[:, 1]
+        elif angle == 270:
+            labels[:, 0], labels[:, 1] = labels[:, 1], 1 - labels[:, 0]
+    return image, labels
+

+

user's guidance: +- Define this data augmentation method in the form of a dictionary in the model's yaml file. Same usage as described above +

    - {func_name: rotate, angle: 90}
+

+

Show results:

+

     

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/tutorials/deployment/index.html b/en/tutorials/deployment/index.html new file mode 100644 index 00000000..44871eb4 --- /dev/null +++ b/en/tutorials/deployment/index.html @@ -0,0 +1,2068 @@ + + + + + + + + + + + + + + + + + + + + + + + + Deployment - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Deployment

+

Dependencies

+
pip install -r requirement.txt
+
+

MindSpore Lite environment preparation

+

Reference: Lite environment configuration
+ Note: The python environment that MindSpore Lite is adapted to is 3.7. Please prepare the python3.7 environment before installing Lite

+
    +
  1. +

    Depending on the environment, download the matching tar.gz package and whl package.

    +
  2. +
  3. +

    Unzip the tar.gz package and install the corresponding version of the whl package +

    tar -zxvf mindspore_lite-2.0.0a0-cp37-cp37m-{os}_{platform}_64.tar.gz
    +pip install mindspore_lite-2.0.0a0-cp37-cp37m-{os}_{platform}_64.whl
    +

    +
  4. +
  5. Configure Lite environment variables + LITE_HOME is the folder path extracted from tar.gz. It is recommended to use the absolute path. +
    export LITE_HOME=/path/to/mindspore-lite-{version}-{os}-{platform}
    +export LD_LIBRARY_PATH=$LITE_HOME/runtime/lib:$LITE_HOME/tools/converter/lib:$LD_LIBRARY_PATH
    +export PATH=$LITE_HOME/tools/converter/converter:$LITE_HOME/tools/benchmark:$PATH
    +
  6. +
+

Quick Start

+

Model conversion

+

Convert ckpt model to mindir model, this step can be run on CPU/Ascend910 +

python ./deploy/export.py --config ./path_to_config/model.yaml --weight ./path_to_ckpt/weight.ckpt --per_batch_size 1 --file_format MINDIR --device_target [CPU/Ascend]
+e.g.
+#Run on CPU
+python ./deploy/export.py --config ./configs/yolov5/yolov5n.yaml --weight yolov5n_300e_mAP273-9b16bd7b.ckpt --per_batch_size 1 --file_format MINDIR --device_target CPU
+# Run on Ascend
+python ./deploy/export.py --config ./configs/yolov5/yolov5n.yaml --weight yolov5n_300e_mAP273-9b16bd7b.ckpt --per_batch_size 1 --file_format MINDIR --device_target Ascend
+

+

Lite Test

+
python deploy/test.py --model_type Lite --model_path ./path_to_mindir/weight.mindir --config ./path_to_config/yolo.yaml
+e.g.
+python deploy/test.py --model_type Lite --model_path ./yolov5n.mindir --config ./configs/yolov5/yolov5n.yaml
+
+

Lite Predict

+
python ./deploy/predict.py --model_type Lite --model_path ./path_to_mindir/weight.mindir --config ./path_to_conifg/yolo.yaml --image_path ./path_to_image/image.jpg
+e.g.
+python deploy/predict.py --model_type Lite --model_path ./yolov5n.mindir --config ./configs/yolov5/yolov5n.yaml --image_path ./coco/image/val2017/image.jpg
+
+

Script description

+
    +
  • predict.py supports single image inference
  • +
  • test.py supports COCO data set inference
  • +
  • Note: currently only supports inference on Ascend 310
  • +
+

MindX Deployment

+

Environment configuration

+

Reference: MindX environment preparation
+Note: MindX currently supports python version 3.9. Please prepare the python3.9 environment before installing MindX

+
    +
  1. +

    Obtain the [Environment Installation Package] (https://www.hiascend.com/software/mindx-sdk/commercial) from the MindX official website. Currently, version 3.0.0 of MindX infer is supported.

    +
  2. +
  3. +

    Jump to the Download page Download Ascend-mindxsdk-mxmanufacture_{version}_linux-{arch}.run

    +
  4. +
  5. +

    Place the installation package in the Ascend310 machine directory and unzip it

    +
  6. +
  7. +

    If you are not a root user, you need to add executable permissions to the package: +

    chmod +x Ascend-mindxsdk-mxmanufacture_{version}_linux-{arch}.run
    +

    +
  8. +
  9. Enter the upload path of the development kit package and install the mxManufacture development kit package. +
    ./Ascend-mindxsdk-mxmanufacture_{version}_linux-{arch}.run --install
    +
    +After the installation is completed, if the following echo appears, it means that the software was successfully installed. +
    The installation is successful
    +
    +After the installation is complete, the mxManufacture software directory structure is as follows: +
    .
    +├── bin
    +├── config
    +├── filelist.txt
    +├── include
    +├── lib
    +├── opensource
    +├── operators
    +├── python
    +├── samples
    +├── set_env.sh
    +├── toolkit
    +└── version.info
    +
  10. +
  11. Enter the installation directory of mxmanufacture and run the following command to make the MindX SDK environment variables take effect. +
    source set_env.sh
    +
  12. +
  13. Enter ./mxVision-3.0.0/python/ and install mindx-3.0.0-py3-none-any.whl +
    pip install mindx-3.0.0-py3-none-any.whl
    +
  14. +
+

Model conversion

+
    +
  1. +

    Convert ckpt model to air model. This step needs to be performed on Ascend910. +

    python ./deploy/export.py --config ./path_to_config/model.yaml --weight ./path_to_ckpt/weight.ckpt --per_batch_size 1 --file_format AIR
    +e.g.
    +python ./deploy/export.py --config ./configs/yolov5/yolov5n.yaml --weight yolov5n_300e_mAP273-9b16bd7b.ckpt --per_batch_size 1 --file_format AIR
    +
    + yolov7 needs to run export on an Ascend910 machine with version 2.0 or above

    +
  2. +
  3. +

    To convert the air model to the om model, use atc conversion tool. This step requires the installation of MindX Environment, running on Ascend310 +

    atc --model=./path_to_air/weight.air --framework=1 --output=yolo --soc_version=Ascend310
    +

    +
  4. +
+

MindX Test

+

Infer COCO data: +

python ./deploy/test.py --model_type MindX --model_path ./path_to_om/weight.om --config ./path_to_config/yolo.yaml
+e.g.
+python ./deploy/test.py --model_type MindX --model_path ./yolov5n.om --config ./configs/yolov5/yolov5n.yaml
+

+

MindX Predict

+

Infer a single image: +

python ./deploy/predict.py --model_type MindX --model_path ./path_to_om/weight.om --config ./path_to_config/yolo.yaml --image_path ./path_to_image/image.jpg
+e.g.
+python ./deploy/predict.py --model_type MindX --model_path ./yolov5n.om --config ./configs/yolov5/yolov5n.yaml --image_path ./coco/image/val2017/image.jpg
+

+

MindIR Deployment

+

Environmental requirements

+

mindspore>=2.1

+

Precautions

+
    +
  1. +

    Currently only supports Predict

    +
  2. +
  3. +

    Theoretically, it can also run on Ascend910, but it has not been tested.

    +
  4. +
+

Model conversion

+

Convert the ckpt model to the mindir model, this step can be run on the CPU +

python ./deploy/export.py --config ./path_to_config/model.yaml --weight ./path_to_ckpt/weight.ckpt --per_batch_size 1 --file_format MINDIR --device_target CPU
+e.g.
+#Run on CPU
+python ./deploy/export.py --config ./configs/yolov5/yolov5n.yaml --weight yolov5n_300e_mAP273-9b16bd7b.ckpt --per_batch_size 1 --file_format MINDIR --device_target CPU
+

+

MindIR Test

+

Coming soon

+

MindIR Predict

+

Infer a single image: +

python ./deploy/predict.py --model_type MindIR --model_path ./path_to_mindir/weight.mindir --config ./path_to_conifg/yolo.yaml --image_path ./path_to_image/image.jpg
+e.g.
+python deploy/predict.py --model_type MindIR --model_path ./yolov5n.mindir --config ./configs/yolov5/yolov5n.yaml --image_path ./coco/image/val2017/image.jpg
+

+

ONNX deployment

+

Environment configuration

+
pip install onnx>=1.9.0
+pip install onnxruntime>=1.8.0
+
+

Precautions

+
    +
  1. +

    Currently not all mindyolo supports ONNX export and inference (only YoloV3 is used as an example)

    +
  2. +
  3. +

    Currently only supports the Predict function

    +
  4. +
  5. +

    Exporting ONNX requires adjusting the nn.SiLU operator and using the underlying implementation of the sigmoid operator.

    +
  6. +
+

For example: add the following custom layer and replace all nn.SiLU in mindyolo +

class EdgeSiLU(nn.Cell):
+    """
+    SiLU activation function: x * sigmoid(x). To support for onnx export with nn.SiLU.
+    """
+
+    def __init__(self):
+        super().__init__()
+
+    def construct(self, x):
+        return x * ops.sigmoid(x)
+

+

Model conversion

+

Convert the ckpt model to an ONNX model. This step and the Test step can only be run on the CPU. +

python ./deploy/export.py --config ./path_to_config/model.yaml --weight ./path_to_ckpt/weight.ckpt --per_batch_size 1 --file_format ONNX --device_target [CPU]
+e.g.
+#Run on CPU
+python ./deploy/export.py --config ./configs/yolov3/yolov3.yaml --weight yolov3-darknet53_300e_mAP455-adfb27af.ckpt --per_batch_size 1 --file_format ONNX --device_target CPU
+

+

ONNX Test

+

Coming soon

+

ONNXRuntime Predict

+

Infer a single image: +

python ./deploy/predict.py --model_type ONNX --model_path ./path_to_onnx_model/model.onnx --config ./path_to_config/yolo.yaml --image_path ./path_to_image/image.jpg
+e.g.
+python ./deploy/predict.py --model_type ONNX --model_path ./yolov3.onnx --config ./configs/yolov3/yolov3.yaml --image_path ./coco/image/val2017/image.jpg
+

+

Standard and supported model libraries

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleContextImageSizeDatasetBox mAP (%)ParamsFLOPsRecipeDownload
YOLOv8ND310x1-G640MS COCO 201737.23.2M8.7Gyamlckpt
mindir
YOLOv8SD310x1-G640MS COCO 201744.611.2M28.6Gyamlckpt
mindir
YOLOv8MD310x1-G640MS COCO 201750.525.9M78.9Gyamlckpt
mindir
YOLOv8LD310x1-G640MS COCO 201752.843.7M165.2Gyamlckpt
mindir
YOLOv8XD310x1-G640MS COCO 201753.768.2M257.8Gyamlckpt
mindir
YOLOv7TinyD310x1-G640MS COCO 201737.56.2M13.8Gyamlckpt
mindir
YOLOv7LD310x1-G640MS COCO 201750.836.9M104.7Gyamlckpt
mindir
YOLOv7XD310x1-G640MS COCO 201752.471.3M189.9Gyamlckpt
mindir
YOLOv5ND310x1-G640MS COCO 201727.31.9M4.5Gyamlckpt
mindir
YOLOv5SD310x1-G640MS COCO 201737.67.2M16.5Gyamlckpt
mindir
YOLOv5MD310x1-G640MS COCO 201744.921.2M49.0Gyamlckpt
mindir
YOLOv5LD310x1-G640MS COCO 201748.546.5M109.1Gyamlckpt
mindir
YOLOv5XD310x1-G640MS COCO 201750.586.7M205.7Gyamlckpt
mindir
YOLOv4CSPDarknet53D310x1-G608MS COCO 201745.427.6M52Gyamlckpt
mindir
YOLOv4CSPDarknet53(silu)D310x1-G640MS COCO 201745.827.6M52Gyamlckpt
mindir
YOLOv3Darknet53D310x1-G640MS COCO 201745.561.9M156.4Gyamlckpt
mindir
YOLOXND310x1-G416MS COCO 201724.10.9M1.1Gyamlckpt
mindir
YOLOXTinyD310x1-G416MS COCO 201733.35.1M6.5Gyamlckpt
mindir
YOLOXSD310x1-G640MS COCO 201740.79.0M26.8Gyamlckpt
mindir
YOLOXMD310x1-G640MS COCO 201746.725.3M73.8Gyamlckpt
mindir
YOLOXLD310x1-G640MS COCO 201749.254.2M155.6Gyamlckpt
mindir
YOLOXXD310x1-G640MS COCO 201751.699.1M281.9Gyamlckpt
mindir
YOLOXDarknet53D310x1-G640MS COCO 201747.763.7M185.3Gyamlckpt
mindir
+


+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/tutorials/finetune/index.html b/en/tutorials/finetune/index.html new file mode 100644 index 00000000..68a79d4f --- /dev/null +++ b/en/tutorials/finetune/index.html @@ -0,0 +1,1381 @@ + + + + + + + + + + + + + + + + + + + + + + + + Finetune - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Fine-tuning

+

Custom Dataset Finetune Process

+

This article takes the Safety Hat Wearing Detection Dataset (SHWD) as an example to introduce the main process of finetune on MindYOLO with a custom data set.

+

Dataset Conversion

+

SHWD Dataset uses data labels in voc format, and its file directory is as follows: +

             Root directory
+                ├── Comments
+                │ ├── 000000.xml
+                │ └── 000002.xml
+                ├── Image Collection
+                │ └── Main
+                │ ├── test.txt
+                │ ├── train.txt
+                │ ├── trainval.txt
+                │ └── val.txt
+                └── JPEG image
+                        ├── 000000.jpg
+                        └── 000002.jpg
+
+The xml file under the Annotations folder contains annotation information for each picture. The main contents are as follows: +
<annotation>
+  <folder>JPEGImages</folder>
+  <filename>000377.jpg</filename>
+  <path>F:\baidu\VOC2028\JPEGImages\000377.jpg</path>
+  <source>
+    <database>Unknown</database>
+  </source>
+  <size>
+    <width>750</width>
+    <height>558</height>
+    <depth>3</depth>
+  </size>
+  <segmented>0</segmented>
+  <object>
+    <name>hat</name>
+    <pose>Unspecified</pose>
+    <truncated>0</truncated>
+    <difficult>0</difficult>
+    <bndbox>
+      <xmin>142</xmin>
+      <ymin>388</ymin>
+      <xmax>177</xmax>
+      <ymax>426</ymax>
+    </bndbox>
+  </object>
+
+It contains multiple objects. The name in object is the category name, and xmin, ymin, xmax, and ymax are the coordinates of the upper left corner and lower right corner of the detection frame.

+

The data set format supported by MindYOLO is YOLO format. For details, please refer to Data Preparation

+

Since MindYOLO selects the image name as image_id during the verification phase, the image name can only be of numeric type, not of string type, and the image needs to be renamed. Conversion to SHWD data set format includes the following steps: +* Copy the image to the corresponding path and rename it +* Write the relative path of the image in the corresponding txt file in the root directory +* Parse the xml file and generate the corresponding txt annotation file under the corresponding path +* The verification set also needs to generate the final json file

+

For detailed implementation, please refer to convert_shwd2yolo.py. The operation method is as follows:

+

python examples/finetune_SHWD/convert_shwd2yolo.py --root_dir /path_to_shwd/SHWD
+
+Running the above command will generate a SHWD data set in yolo format in the same directory without changing the original data set.

+

Write yaml configuration file

+

The configuration file mainly contains the corresponding parameters related to the data set, data enhancement, loss, optimizer, and model structure. Since MindYOLO provides a yaml file inheritance mechanism, you can only write the parameters that need to be adjusted as yolov7-tiny_shwd.yaml and inherit the native ones provided by MindYOLO. yaml file, its content is as follows: +

__BASE__: [
+  '../../configs/yolov7/yolov7-tiny.yaml',
+]
+
+per_batch_size: 16 # Single card batchsize, total batchsize=per_batch_size * device_num
+img_size: 640 # image sizes
+weight: ./yolov7-tiny_pretrain.ckpt
+strict_load: False # Whether to strictly load the internal parameters of ckpt. The default is True. If set to False, when the number of classifications is inconsistent, the weight of the last layer of classifiers will be discarded.
+log_interval: 10 #Print the loss result every log_interval iterations
+
+data:
+  dataset_name: shwd
+  train_set: ./SHWD/train.txt # Actual training data path
+  val_set: ./SHWD/val.txt
+  test_set: ./SHWD/val.txt
+  nc: 2 # Number of categories
+  # class names
+  names: [ 'person', 'hat' ] # The name of each category
+
+optimizer:
+  lr_init: 0.001 # initial learning rate
+
+* __BASE__ is a list, indicating the path of the inherited yaml file. Multiple yaml files can be inherited. +* per_batch_size and img_size respectively represent the batch_size on a single card and the image size used for data processing images. +* weight is the file path of the pre-trained model mentioned above, and strict_load means discarding parameters with inconsistent shapes. +* log_interval represents the log printing interval +* All parameters under the data field are data set related parameters, where dataset_name is the name of the custom data set, train_set, val_set, and test_set are the txt file paths that save the training set, validation set, and test set image paths respectively, nc is the number of categories, and names is classification name +* lr_init under the optimizer field is the initial learning rate after warm_up, which is 10 times smaller than the default parameters.

+

For parameter inheritance relationship and parameter description, please refer to Configuration.

+

Download pre-trained model

+

You can choose the Model Warehouse provided by MindYOLO as the pre-training model for the custom data set. The pre-training model already has better accuracy performance on the COCO data set. Compared with training from scratch, loading a pre-trained model will generally have faster convergence speed and higher final accuracy, and will most likely avoid problems such as gradient disappearance and gradient explosion caused by improper initialization.

+

The number of categories in the custom data set is usually inconsistent with the COCO data set. The detection head structure of each model in MindYOLO is related to the number of categories in the data set. Directly importing the pre-trained model may fail due to inconsistent shape. You can configure it in the yaml configuration file. Set the strict_load parameter to False, MindYOLO will automatically discard parameters with inconsistent shapes and throw a warning that the module parameter is not imported.

+

Model fine-tuning (Finetune)

+

During the process of model fine-tuning, you can first train according to the default configuration. If the effect is not good, you can consider adjusting the following parameters: +* The learning rate can be adjusted smaller to prevent loss from being difficult to converge. +* per_batch_size can be adjusted according to the actual video memory usage. Generally, the larger per_batch_size is, the more accurate the gradient calculation will be. +* Epochs can be adjusted according to whether the loss converges +* Anchor can be adjusted according to the actual object size

+

Since the SHWD training set only has about 6,000 images, the yolov7-tiny model was selected for training. +* Distributed model training on multi-card NPU/GPU, taking 8 cards as an example:

+
mpirun --allow-run-as-root -n 8 python train.py --config ./examples/finetune_SHWD/yolov7-tiny_shwd.yaml --is_parallel True
+
+
    +
  • Train the model on a single card NPU/GPU/CPU:
  • +
+

python train.py --config ./examples/finetune_SHWD/yolov7-tiny_shwd.yaml
+
+*Note: Directly using the default parameters of yolov7-tiny to train on the SHWD data set can achieve an accuracy of AP50 87.0. Changing the lr_init parameter from 0.01 to 0.001 can achieve an accuracy result of ap50 of 89.2. *

+

Visual reasoning

+

Use /demo/predict.py to use the trained model for visual reasoning. The operation method is as follows:

+

python demo/predict.py --config ./examples/finetune_SHWD/yolov7-tiny_shwd.yaml --weight=/path_to_ckpt/WEIGHT.ckpt --image_path /path_to_image/IMAGE.jpg
+
+The reasoning effect is as follows:

+
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/tutorials/modelarts/index.html b/en/tutorials/modelarts/index.html new file mode 100644 index 00000000..8b88d4ed --- /dev/null +++ b/en/tutorials/modelarts/index.html @@ -0,0 +1,1288 @@ + + + + + + + + + + + + + + + + + + + + + + + + CloudBrain - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

MindYOLO ModelArts Training Quick Start

+

This article mainly introduces the training method of MindYOLO using the ModelArts platform. +ModelArts related tutorial reference Help Center

+

Prepare data and code

+

Use the OBS service to upload data sets. For related operation tutorials, see OBS User Guide to obtain the [AK] of this account (https:// docs.xckpjs.com/zh-cn/browsertg/obs/obs_03_1007.html), please consult the corresponding platform administrator or account person in charge for the server address. If the AK is not in the location specified in the user guide, please also consult the platform administrator or account person in charge. .
+operate:

+
    +
  1. Log in to obs browser+ + obs
  2. +
  3. Create a bucket -> create a new folder (eg: coco) + Bucket
  4. +
  5. To upload data files, please place the data files in a separate folder (that is, coco in the use case). The code will copy the data in the obs bucket, and the copied content will be all files in this folder (such as coco). document. Without creating a new folder, you cannot select the complete data set.
  6. +
+

Dataset

+

Prepare code

+

Also use the OBS service to upload the training code.
+Operation: Create a bucket -> Create a new folder (such as: mindyolo) -> Upload the code file, create an output folder at the same level of mindyolo to store training records, and create a log folder to store logs. + Bucket directory + Kit code

+

Create new algorithm

+
    +
  1. Select Algorithm Management->Create in the tab. + Create algorithm
  2. +
  3. Customize the algorithm name, select Ascend-Powered-Engine for the prefabricated framework, select the MindSpore-2.0 version image for the master branch, and select the MindSpore-1.8.1 version image for the r0.1 branch. Set the code directory, startup file, input, and output. and superparameters. + Algorithm configuration
  4. +
+
    +
  • If you need to load pre-trained weights, you can select the uploaded model file in the model selection and add the ckpt_dir parameter in the running parameters. + ckpt
  • +
  • The startup file is train.py
  • +
  • To run super parameters, enable_modelarts needs to be added, and the value is True.
  • +
  • The running super parameter config path refers to the directory of the running environment preview in the training job, such as /home/ma-user/modelarts/user-job-dir/mindyolo/configs/yolov5/yolov5n.yaml
  • +
  • If distributed training scenarios are involved, the hyperparameter is_parallel needs to be added and set to True when running in distributed mode and False when running on a single card.
  • +
+

Create new job

+
    +
  1. Select in the ModelArts service: Training Management -> Training Jobs -> Create a training job, set the job name, and choose not to include it in the experiment; Create Method -> My Algorithm, select the newly created algorithm; + task + task1
  2. +
  3. Training input -> Data storage location, select the obs data bucket just created (coco in the example), select the output folder when preparing the code for training output, and set the config hyperparameter value according to the running environment preview; + task2
  4. +
  5. Select the resource pool, specifications, computing nodes, and select the log folder when creating the code for the job log path. + task3 + Specifications
  6. +
  7. Submit training and it will be running after queuing.
  8. +
+

Modify job

+

Select Rebuild on the training job page to modify the selected job configuration.

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/tutorials/quick_start/index.html b/en/tutorials/quick_start/index.html new file mode 100644 index 00000000..13987aab --- /dev/null +++ b/en/tutorials/quick_start/index.html @@ -0,0 +1,1323 @@ + + + + + + + + + + + + + + + + + + + + + + + + Quick Start - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+ +
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Quick Start

+

Getting Started with MindYOLO

+

This document provides a brief introduction to the usage of built-in command-line tools in MindYOLO.

+

Inference Demo with Pre-trained Models

+
    +
  1. Pick a model and its config file from the Model Zoo, such as, ./configs/yolov7/yolov7.yaml.
  2. +
  3. Download the corresponding pre-trained checkpoint from the Model Zoo of each model.
  4. +
  5. To run YOLO object detection with the built-in configs, please run:
  6. +
+
# Run with Ascend (By default)
+python demo/predict.py --config ./configs/yolov7/yolov7.yaml --weight=/path_to_ckpt/WEIGHT.ckpt --image_path /path_to_image/IMAGE.jpg
+
+# Run with GPU
+python demo/predict.py --config ./configs/yolov7/yolov7.yaml --weight=/path_to_ckpt/WEIGHT.ckpt --image_path /path_to_image/IMAGE.jpg --device_target=GPU
+
+

For details of the command line arguments, see demo/predict.py -h or look at its source code. +to understand their behavior. Some common arguments are: +* To run on cpu, modify device_target to CPU. +* The results will be saved in ./detect_results

+

Training & Evaluation in Command Line

+
    +
  • Prepare your dataset in YOLO format. If training with COCO (YOLO format), please prepare it from yolov5 or the darknet.
  • +
+
+ +
  coco/
+    {train,val}2017.txt
+    annotations/
+      instances_{train,val}2017.json
+    images/
+      {train,val}2017/
+          00000001.jpg
+          ...
+          # image files that are mentioned in the corresponding train/val2017.txt
+    labels/
+      {train,val}2017/
+          00000001.txt
+          ...
+          # label files that are mentioned in the corresponding train/val2017.txt
+
+
+ +
    +
  • +

    To train a model on 8 NPUs/GPUs: +

    mpirun --allow-run-as-root -n 8 python train.py --config ./configs/yolov7/yolov7.yaml  --is_parallel True
    +

    +
  • +
  • +

    To train a model on 1 NPU/GPU/CPU: +

    python train.py --config ./configs/yolov7/yolov7.yaml 
    +

    +
  • +
  • +

    To evaluate a model's performance on 1 NPU/GPU/CPU: +

    python test.py --config ./configs/yolov7/yolov7.yaml --weight /path_to_ckpt/WEIGHT.ckpt
    +

    +
  • +
  • To evaluate a model's performance 8 NPUs/GPUs: +
    mpirun --allow-run-as-root -n 8 python test.py --config ./configs/yolov7/yolov7.yaml --weight /path_to_ckpt/WEIGHT.ckpt --is_parallel True
    +
    +Notes:
  • +
+

(1) The default hyper-parameter is used for 8-card training, and some parameters need to be adjusted in the case of a single card.

+

(2) The default device is Ascend, and you can modify it by specifying 'device_target' as Ascend/GPU/CPU, as these are currently supported.

+

(3) For more options, see train/test.py -h.

+

(4) To train on CloudBrain, see here

+

Deployment

+

See here.

+

To use MindYOLO APIs in Your Code

+

Coming soon.

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/how_to_guides/callback/index.html b/how_to_guides/callback/index.html new file mode 100644 index 00000000..18887b8c --- /dev/null +++ b/how_to_guides/callback/index.html @@ -0,0 +1,1173 @@ + + + + + + + + + + + + + + + + + + + + + + + + Callback - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Usage of MindYOLO callback function

+

Callback function: When the program runs to a certain mount point, all methods registered to the mount point at runtime will be automatically called. +The flexibility and extensibility of the program can be increased by using the callback function, because users can register custom methods to the mount point to be called without modifying the code in the program.

+

In MindYOLO, the callback function is specifically implemented in the mindyolo/utils/callback.py file. +

#mindyolo/utils/callback.py
+@CALLBACK_REGISTRY.registry_module()
+class callback_class_name(BaseCallback):
+    def __init__(self, **kwargs):
+        super().__init__()
+...
+def callback_fn_name(self, run_context: RunContext):
+    pass
+

+

Add a dictionary list under the callback field of the model's yaml file to implement the call +

#Callback function configuration dictionary:
+callback:
+- { name: callback_class_name, args: xx }
+- { name: callback_class_name2, args: xx }
+
+For example, take YOLOX as an example:

+

Add logic to the on_train_step_begin method in the YoloxSwitchTrain class in the mindyolo/utils/callback.py file to print "train step begin” log +

@CALLBACK_REGISTRY.registry_module()
+class YoloxSwitchTrain(BaseCallback):
+
+    def on_train_step_begin(self, run_context: RunContext):
+        # Custom logic
+        logger.info("train step begin")
+        pass
+
+Add the callback function under the callback field of the YOLOX corresponding yaml file configs/yolox/hyp.scratch.yaml +
callback:
+  - { name: YoloxSwitchTrain, switch_epoch_num: 285 }
+
+Then the logger.info("train step begin") statement will be executed before each training step is executed.

+

With the help of the callback function, users can customize the logic that needs to be executed at a certain mount point without having to understand the code of the complete training process.

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/how_to_guides/data_preparation/index.html b/how_to_guides/data_preparation/index.html new file mode 100644 index 00000000..0b493abf --- /dev/null +++ b/how_to_guides/data_preparation/index.html @@ -0,0 +1,1230 @@ + + + + + + + + + + + + + + + + + + + + + + + + Data Preparation - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Data preparation

+

Dataset format introduction

+

Download coco2017 YOLO format coco2017labels-segments and coco2017 original images train2017 , val2017 , then put the coco2017 original images into the coco2017 YOLO format images directory: +

└─ coco2017_yolo
+    ├─ annotations
+        └─ instances_val2017.json
+    ├─ images
+        ├─ train2017   # coco2017 原始图片
+        └─ val2017     # coco2017 原始图片
+    ├─ labels
+        ├─ train2017
+        └─ val2017
+    ├─ train2017.txt
+    ├─ val2017.txt
+    └─ test-dev2017.txt
+
+Each line of the train.txt file corresponds to the relative path of a single image, for example: +
./images/train2017/00000000.jpg
+./images/train2017/00000001.jpg
+./images/train2017/00000002.jpg
+./images/train2017/00000003.jpg
+./images/train2017/00000004.jpg
+./images/train2017/00000005.jpg
+
+The txt files in the train2017 folder under labels are the annotation information of the corresponding images, supporting both detect and segment formats.

+

Detect format: Usually each row has 5 columns, corresponding to the category id and the center coordinates xy and width and height wh after normalization of the annotation box +

62 0.417040 0.206280 0.403600 0.412560
+62 0.818810 0.197933 0.174740 0.189680
+39 0.684540 0.277773 0.086240 0.358960
+0 0.620220 0.725853 0.751680 0.525840
+63 0.197190 0.364053 0.394380 0.669653
+39 0.932330 0.226240 0.034820 0.076640
+
+segment format: the first data in each line is the category id, followed by pairs of normalized coordinate points x, y

+

45 0.782016 0.986521 0.937078 0.874167 0.957297 0.782021 0.950562 0.739333 0.825844 0.561792 0.714609 0.420229 0.657297 0.391021 0.608422 0.4 0.0303438 0.750562 0.0016875 0.811229 0.003375 0.889896 0.0320156 0.986521
+45 0.557859 0.143813 0.487078 0.0314583 0.859547 0.00897917 0.985953 0.130333 0.984266 0.184271 0.930344 0.386521 0.80225 0.480896 0.763484 0.485396 0.684266 0.39775 0.670781 0.3955 0.679219 0.310104 0.642141 0.253937 0.561234 0.155063 0.559547 0.137083
+50 0.39 0.727063 0.418234 0.649417 0.455297 0.614125 0.476469 0.614125 0.51 0.590583 0.54 0.569417 0.575297 0.562354 0.601766 0.56 0.607062 0.536479 0.614125 0.522354 0.637063 0.501167 0.665297 0.48 0.69 0.477646 0.698828 0.494125 0.698828 0.534125 0.712938 0.529417 0.742938 0.548229 0.760594 0.564708 0.774703 0.550583 0.778234 0.536479 0.781766 0.531771 0.792359 0.541167 0.802937 0.555292 0.802937 0.569417 0.802937 0.576479 0.822359 0.576479 0.822359 0.597646 0.811766 0.607062 0.811766 0.618833 0.818828 0.637646 0.820594 0.656479 0.827641 0.687063 0.827641 0.703521 0.829406 0.727063 0.838234 0.708229 0.852359 0.729417 0.868234 0.750583 0.871766 0.792938 0.877063 0.821167 0.884125 0.861167 0.817062 0.92 0.734125 0.976479 0.711172 0.988229 0.48 0.988229 0.494125 0.967063 0.517062 0.912937 0.508234 0.832937 0.485297 0.788229 0.471172 0.774125 0.395297 0.729417
+45 0.375219 0.0678333 0.375219 0.0590833 0.386828 0.0503542 0.424156 0.0315208 0.440797 0.0281458 0.464 0.0389167 0.525531 0.115583 0.611797 0.222521 0.676359 0.306583 0.678875 0.317354 0.677359 0.385271 0.66475 0.394687 0.588594 0.407458 0.417094 0.517771 0.280906 0.604521 0.0806562 0.722208 0.0256719 0.763917 0.00296875 0.809646 0 0.786104 0 0.745083 0 0.612583 0.03525 0.613271 0.0877187 0.626708 0.130594 0.626708 0.170437 0.6025 0.273844 0.548708 0.338906 0.507 0.509906 0.4115 0.604734 0.359042 0.596156 0.338188 0.595141 0.306583 0.595141 0.291792 0.579516 0.213104 0.516969 0.129042 0.498297 0.100792 0.466516 0.0987708 0.448875 0.0786042 0.405484 0.0705208 0.375219 0.0678333 0.28675 0.108375 0.282719 0.123167 0.267078 0.162854 0.266062 0.189083 0.245391 0.199833 0.203516 0.251625 0.187375 0.269771 0.159641 0.240188 0.101125 0.249604 0 0.287271 0 0.250271 0 0.245563 0.0975938 0.202521 0.203516 0.145354 0.251953 0.123167 0.28675 0.108375
+49 0.587812 0.128229 0.612281 0.0965625 0.663391 0.0840833 0.690031 0.0908125 0.700109 0.10425 0.705859 0.133042 0.700109 0.143604 0.686422 0.146479 0.664828 0.153188 0.644672 0.157042 0.629563 0.175271 0.605797 0.181021 0.595 0.147437
+49 0.7405 0.178417 0.733719 0.173896 0.727781 0.162583 0.729484 0.150167 0.738812 0.124146 0.747281 0.0981458 0.776109 0.0811875 0.804094 0.0845833 0.814266 0.102667 0.818516 0.115104 0.812578 0.133208 0.782906 0.151292 0.754063 0.172771
+49 0.602656 0.178854 0.636125 0.167875 0.655172 0.165125 0.6665 0.162375 0.680391 0.155521 0.691719 0.153458 0.703047 0.154146 0.713859 0.162375 0.724156 0.174729 0.730844 0.193271 0.733422 0.217979 0.733938 0.244063 0.733422 0.281813 0.732391 0.295542 0.728266 0.300354 0.702016 0.294854 0.682969 0.28525 0.672156 0.270146
+49 0.716891 0.0519583 0.683766 0.0103958 0.611688 0.0051875 0.568828 0.116875 0.590266 0.15325 0.590266 0.116875 0.613641 0.0857083 0.631172 0.0857083 0.6565 0.083125 0.679875 0.0883125 0.691563 0.0961042 0.711031 0.0649375
+
+instances_val2017.json is the verification set annotation in coco format, which can directly call coco api for map calculation.

+

During training & reasoning, you need to modify train_set, val_set, test_set in configs/coco.yaml to the actual data path

+

For actual examples of using MindYOLO kit to complete custom dataset finetune, please refer to Finetune

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/how_to_guides/write_a_new_model/index.html b/how_to_guides/write_a_new_model/index.html new file mode 100644 index 00000000..39138953 --- /dev/null +++ b/how_to_guides/write_a_new_model/index.html @@ -0,0 +1,1552 @@ + + + + + + + + + + + + + + + + + + + + + + + + Write A New Model - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Model Writing Guide

+

This document provides a tutorial for writing custom models for MindYOLO.
+It is divided into three parts:

+
    +
  • Model definition: We can define a network directly or use a yaml file to define a network.
  • +
  • Register model: Optional. After registration, you can use the file name in the create_model interface to create a custom model
  • +
  • Verification: Verify whether the model is operational
  • +
+

Model definition

+

1. Use python code directly to write the network

+

Module import

+

Import the nn module and ops module in the MindSpore framework to define the components and operations of the neural network. +

import mindspore.nn as nn
+import mindspore.ops.operations as ops
+

+

Create a model

+

Define a model class MyModel that inherits from nn.Cell. In the constructor init, define the various components of the model:

+
class MyModel(nn.Cell):
+    def __init__(self):
+        super(MyModel, self).__init__()
+        #conv1是一个2D卷积层,输入通道数为3,输出通道数为16,卷积核大小为3x3,步长为1,填充为1。
+        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
+        #relu是一个ReLU激活函数操作。
+        self.relu = ops.ReLU()
+        #axpool是一个2D最大池化层,池化窗口大小为2x2,步长为2。
+        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
+        #conv2是另一个2D卷积层,输入通道数为16,输出通道数为32,卷积核大小为3x3,步长为1,填充为1。
+        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
+        #fc是一个全连接层,输入特征维度为32x8x8,输出特征维度为10。
+        self.fc = nn.Dense(32 * 8 * 8, 10)
+
+    #在construct方法中,定义了模型的前向传播过程。输入x经过卷积、激活函数、池化等操作后,通过展平操作将特征张量变为一维向量,然后通过全连接层得到最终的输出结果。    
+    def construct(self, x): 
+        x = self.conv1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.conv2(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = x.view(x.shape[0], -1)
+        x = self.fc(x)
+        return x
+
+

Create a model instance

+

By instantiating the MyModel class, create a model instance model, which can be used for model training and reasoning later. +

model = MyModel()
+

+

2. Use yaml file to write network

+

Usually need the following three steps:

+
    +
  • Create a new mymodel.yaml file
  • +
  • Create a corresponding mymodel.py file
  • +
  • Introduce the model in the mindyolo/models/init.py file
  • +
+

Here is a detailed guide to writing mymodel.yaml file:
+Take writing a simple network as an example: +Write the necessary parameters in yaml format, and then use these parameters in the mymodel.py file. +The network part is the model network
+[[from, number, module, args], ...]: Each element represents the configuration of a network layer.
+

# The yaml in __BASE__ indicates the base configuration file for inheritance. Repeated parameters will be overwritten by the current file;
+__BASE__:
+-'../coco.yaml'
+-'./hyp.scratch-high.yaml'
+
+per_batch_size: 32
+img_size: 640
+sync_bn: False
+
+network:
+model_name: mymodel
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+stride: [ 8, 16, 32 ]
+
+# Configuration of the backbone network. The meaning of each layer is
+# [from, number, module, args]
+# Take the first layer as an example, [-1, 1, ConvNormAct, [32, 3, 1]], which means the input comes from `-1` (the previous layer), the number of repetitions is 1, and the module name is ConvNormAct, module input parameters are [32, 3, 1];
+backbone:
+[[-1, 1, ConvNormAct, [32, 3, 1]], # 0
+[-1, 1, ConvNormAct, [64, 3, 2]], # 1-P1/2
+[-1, 1, Bottleneck, [64]],
+[-1, 1, ConvNormAct, [128, 3, 2]], # 3-P2/4
+[-1, 2, Bottleneck, [128]],
+[-1, 1, ConvNormAct, [256, 3, 2]], # 5-P3/8
+[-1, 8, Bottleneck, [256]],
+]
+
+#head part configuration
+head:
+[
+[ -1, 1, ConvNormAct, [ 512, 3, 2 ] ], # 7-P4/16
+[ -1, 8, Bottleneck, [ 512 ] ],
+[ -1, 1, ConvNormAct, [ 1024, 3, 2 ] ], # 9-P5/32
+[ -1, 4, Bottleneck, [ 1024 ] ], # 10
+]
+

+

Write mymodel.py file:

+

Module import

+

It is necessary to import modules in the package. For example, from .registry import register_model, etc.

+
import numpy as np
+
+import mindspore as ms
+from mindspore import Tensor, nn
+
+from .initializer import initialize_defult #Used to initialize the default parameters of the model, including weight initialization method, BN layer parameters, etc.
+from .model_factory import build_model_from_cfg #Used to build a target detection model according to the parameters in the YAML configuration file and return an instance of the model.
+from .registry import register_model #Used to register a custom model in Mindyolo for use in the YAML configuration file.
+
+#Visibility declaration
+__all__ = ["MYmodel", "mymodel"]
+
+

Create a configuration dictionary

+

The _cfg function is an auxiliary function used to create a configuration dictionary. It accepts a url parameter and other keyword parameters and returns a dictionary containing the url and other parameters.
+default_cfgs is a dictionary used to store default configurations. Here, mymodel is used as the key to create a configuration dictionary using the _cfg function. +

def _cfg(url="", **kwargs):
+    return {"url": url, **kwargs}
+
+default_cfgs = {"mymodel": _cfg(url="")}
+

+

Create a model

+

In MindSpore, the model class inherits from nn.Cell. Generally, the following two functions need to be overloaded:

+
    +
  • In the __init__ function, the module layer needed in the model should be defined.
  • +
  • In the construct function, define the model forward logic.
  • +
+
class MYmodel(nn.Cell):
+
+    def __init__(self, cfg, in_channels=3, num_classes=None, sync_bn=False):
+        super(MYmodel, self).__init__()
+        self.cfg = cfg
+        self.stride = Tensor(np.array(cfg.stride), ms.int32)
+        self.stride_max = int(max(self.cfg.stride))
+        ch, nc = in_channels, num_classes
+
+        self.nc = nc  # override yaml value
+        self.model = build_model_from_cfg(model_cfg=cfg, in_channels=ch, num_classes=nc, sync_bn=sync_bn)
+        self.names = [str(i) for i in range(nc)]  # default names
+
+        initialize_defult()  # 可选,你可能需要initialize_defult方法以获得和pytorch一样的conv2d、dense层的初始化方式;
+
+    def construct(self, x):
+        return self.model(x)
+
+

Register model (optional)

+

If you need to use the mindyolo interface to initialize a custom model, you need to first register and import the model

+

Model registration
+

@register_model #The registered model can be accessed by the create_model interface as a model name;
+def mymodel(cfg, in_channels=3, num_classes=None, **kwargs) -> MYmodel:
+    """Get GoogLeNet model.
+    Refer to the base class `models.GoogLeNet` for more details."""
+    model = MYmodel(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+Model import

+
#Add the following code to the mindyolo/models/_init_.py file
+
+from . import mymodel #mymodel.py files are usually placed in the mindyolo/models/directory
+__all__.extend(mymodel.__all__)
+from .mymodel import *
+
+

Verify main

+

The initial writing phase should ensure that the model is runnable. Basic verification can be performed through the following code block: +First import the required modules and functions. Then, parse the configuration object.

+

if __name__ == "__main__":
+    from mindyolo.models.model_factory import create_model
+    from mindyolo.utils.config import parse_config
+
+    opt = parse_config()
+
+Create a model and specify related parameters. Note: If you want to use the file name to create a custom model in create_model, you need to register it using the register @register_model first. Please refer to the above Register model (optional) section +
    model = create_model(
+        model_name="mymodel",
+        model_cfg=opt.net,
+        num_classes=opt.data.nc,
+        sync_bn=opt.sync_bn if hasattr(opt, "sync_bn") else False,
+    )
+

+

Otherwise, please use import to introduce the model

+

    from mindyolo.models.mymodel import MYmodel
+    model = MYmodel(
+        model_name="mymodel",
+        model_cfg=opt.net,
+        num_classes=opt.data.nc,
+        sync_bn=opt.sync_bn if hasattr(opt, "sync_bn") else False,
+    ) 
+
+Finally, create an input tensor x and pass it to the model for forward computation. +
    x = Tensor(np.random.randn(1, 3, 640, 640), ms.float32)
+    out = model(x)
+    out = out[0] if isinstance(out, (list, tuple)) else out
+    print(f"Output shape is {[o.shape for o in out]}")
+

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/index.html b/index.html new file mode 100644 index 00000000..c7bfc2cf --- /dev/null +++ b/index.html @@ -0,0 +1,1379 @@ + + + + + + + + + + + + + + + + + + + + + + Home - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

MindYOLO

+

+ + docs + + + GitHub + + + PRs Welcome + +

+ +

MindYOLO implements state-of-the-art YOLO series algorithms based on MindSpore. +The following is the corresponding mindyolo versions and supported mindspore versions.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
mindyolomindspore
mastermaster
0.42.3.0
0.32.2.10
0.22.0
0.11.8
+

+

Benchmark and Model Zoo

+

See Benchmark Results.

+

supported model list

+ +

Installation

+

See INSTALLATION for details.

+

Getting Started

+

See QUICK START for details.

+

Notes

+

⚠️ The current version is based on the static shape of GRAPH. +The dynamic shape will be supported later. Please look forward to it.

+

How to Contribute

+

We appreciate all contributions including issues and PRs to make MindYOLO better.

+

Please refer to CONTRIBUTING for the contributing guideline.

+

License

+

MindYOLO is released under the Apache License 2.0.

+

Acknowledgement

+

MindYOLO is an open source project that welcome any contribution and feedback. We wish that the toolbox and benchmark could support the growing research community, reimplement existing methods, and develop their own new real-time object detection methods by providing a flexible and standardized toolkit.

+

Citation

+

If you find this project useful in your research, please consider cite:

+
@misc{MindSpore Object Detection YOLO 2023,
+    title={{MindSpore Object Detection YOLO}:MindSpore Object Detection YOLO Toolbox and Benchmark},
+    author={MindSpore YOLO Contributors},
+    howpublished = {\url{https://github.com/mindspore-lab/mindyolo}},
+    year={2023}
+}
+
+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/installation/index.html b/installation/index.html new file mode 100644 index 00000000..6a89a4c6 --- /dev/null +++ b/installation/index.html @@ -0,0 +1,1295 @@ + + + + + + + + + + + + + + + + + + + + + + + + Installation - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Installation

+

Dependency

+
    +
  • mindspore >= 2.3
  • +
  • numpy >= 1.17.0
  • +
  • pyyaml >= 5.3
  • +
  • openmpi 4.0.3 (for distributed mode)
  • +
+

To install the dependency, please run

+
pip install -r requirements.txt
+
+

MindSpore can be easily installed by following the official instructions where you can select your hardware platform for the best fit. To run in distributed mode, openmpi is required to install.

+

⚠️ The current version only supports the Ascend platform, and the GPU platform will be supported later.

+

Install with PyPI

+

MindYOLO is published as a Python package and can be installed with pip, ideally by using a virtual environment. Open up a terminal and install MindYOLO with:

+
pip install mindyolo
+
+

Install from Source (Bleeding Edge Version)

+

from VCS

+
pip install git+https://github.com/mindspore-lab/mindyolo.git
+
+

from local src

+

As this project is in active development, if you are a developer or contributor, please prefer this installation!

+

MindYOLO can be directly used from GitHub by cloning the repository into a local folder which might be useful if you want to use the very latest version:

+
git clone https://github.com/mindspore-lab/mindyolo.git
+
+

After cloning from git, it is recommended that you install using "editable" mode, which can help resolve potential module import issues:

+
cd mindyolo
+pip install -e .
+
+

In addition, we provide an optional fast coco api to improve eval speed. The code is provided in C++, and you can try compiling with the following commands (This operation is optional) :

+
cd mindyolo/csrc
+sh build.sh
+
+

We also provide fused GPU operators which are built upon MindSpore ops.Custom API. The fused GPU operators are able to improve train speed. The source code is provided in C++ and CUDA and is in the folder examples/custom_gpu_op/. To enable this feature in the GPU training process, you shall modify the method bbox_iou in mindyolo/models/losses/iou_loss.py by referring to the demo script examples/custom_gpu_op/iou_loss_fused.py. Before runing iou_loss_fused.py, you shall compile the C++ and CUDA source code to dynamic link libraries with the following commands (This operation is optional) :

+
bash examples/custom_gpu_op/fused_op/build.sh
+
+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/modelzoo/benchmark/index.html b/modelzoo/benchmark/index.html new file mode 100644 index 00000000..2883a827 --- /dev/null +++ b/modelzoo/benchmark/index.html @@ -0,0 +1,1668 @@ + + + + + + + + + + + + + + + + + + + + + + + + Benchmark - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Benchmark

+

Detection

+
+performance tested on Ascend 910(8p) with graph mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ParamsRecipeDownload
YOLOv8N16 * 8640MS COCO 201737.23.2Myamlweights
YOLOv8S16 * 8640MS COCO 201744.611.2Myamlweights
YOLOv8M16 * 8640MS COCO 201750.525.9Myamlweights
YOLOv8L16 * 8640MS COCO 201752.843.7Myamlweights
YOLOv8X16 * 8640MS COCO 201753.768.2Myamlweights
YOLOv7Tiny16 * 8640MS COCO 201737.56.2Myamlweights
YOLOv7L16 * 8640MS COCO 201750.836.9Myamlweights
YOLOv7X12 * 8640MS COCO 201752.471.3Myamlweights
YOLOv5N32 * 8640MS COCO 201727.31.9Myamlweights
YOLOv5S32 * 8640MS COCO 201737.67.2Myamlweights
YOLOv5M32 * 8640MS COCO 201744.921.2Myamlweights
YOLOv5L32 * 8640MS COCO 201748.546.5Myamlweights
YOLOv5X16 * 8640MS COCO 201750.586.7Myamlweights
YOLOv4CSPDarknet5316 * 8608MS COCO 201745.427.6Myamlweights
YOLOv4CSPDarknet53(silu)16 * 8608MS COCO 201745.827.6Myamlweights
YOLOv3Darknet5316 * 8640MS COCO 201745.561.9Myamlweights
YOLOXN8 * 8416MS COCO 201724.10.9Myamlweights
YOLOXTiny8 * 8416MS COCO 201733.35.1Myamlweights
YOLOXS8 * 8640MS COCO 201740.79.0Myamlweights
YOLOXM8 * 8640MS COCO 201746.725.3Myamlweights
YOLOXL8 * 8640MS COCO 201749.254.2Myamlweights
YOLOXX8 * 8640MS COCO 201751.699.1Myamlweights
YOLOXDarknet538 * 8640MS COCO 201747.763.7Myamlweights
+
+
+performance tested on Ascend 910*(8p) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ms/stepParamsRecipeDownload
YOLOv8N16 * 8640MS COCO 201737.3373.553.2Myamlweights
YOLOv8S16 * 8640MS COCO 201744.7365.5311.2Myamlweights
YOLOv7Tiny16 * 8640MS COCO 201737.5496.216.2Myamlweights
YOLOv5N32 * 8640MS COCO 201727.4736.081.9Myamlweights
YOLOv5S32 * 8640MS COCO 201737.6787.347.2Myamlweights
YOLOv4CSPDarknet5316 * 8608MS COCO 201746.1337.2527.6Myamlweights
YOLOv3Darknet5316 * 8640MS COCO 201746.6396.6061.9Myamlweights
YOLOXS8 * 8640MS COCO 201741.0242.159.0Myamlweights
+
+

Segmentation

+
+performance tested on Ascend 910(8p) with graph mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)Mask mAP (%)ParamsRecipeDownload
YOLOv8-segX16 * 8640MS COCO 201752.542.971.8Myamlweights
+
+

Deploy inference

+ +

Notes

+
    +
  • Box mAP: Accuracy reported on the validation set.
  • +
+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/modelzoo/yolov3/index.html b/modelzoo/yolov3/index.html new file mode 100644 index 00000000..27b2a7b2 --- /dev/null +++ b/modelzoo/yolov3/index.html @@ -0,0 +1,1483 @@ + + + + + + + + + + + + + + + + + + + + + + + + YOLOv3 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

YOLOv3

+
+

YOLOv3: An Incremental Improvement

+
+

Abstract

+

We present some updates to YOLO! We made a bunch of little design changes to make it better. We also trained this new network that's pretty swell. It's a little bigger than last time but more accurate. It's still fast though, don't worry. At 320x320 YOLOv3 runs in 22 ms at 28.2 mAP, as accurate as SSD but three times faster. When we look at the old .5 IOU mAP detection metric YOLOv3 is quite good. It achieves 57.9 mAP@50 in 51 ms on a Titan X, compared to 57.5 mAP@50 in 198 ms by RetinaNet, similar performance but 3.8x faster.

+
+ +
+ +

Results

+
+performance tested on Ascend 910(8p) with graph mode + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ParamsRecipeDownload
YOLOv3Darknet5316 * 8640MS COCO 201745.561.9Myamlweights
+
+
+performance tested on Ascend 910*(8p) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ms/stepParamsRecipeDownload
YOLOv3Darknet5316 * 8640MS COCO 201746.6396.6061.9Myamlweights
+
+


+

Notes

+
    +
  • Box mAP: Accuracy reported on the validation set.
  • +
  • We referred to a commonly used third-party YOLOv3 implementation.
  • +
+

Quick Start

+

Please refer to the QUICK START in MindYOLO for details.

+

Training

+

- Pretraining Model

+

You can get the pre-training model from here.

+

To convert it to a loadable ckpt file for mindyolo, please put it in the root directory then run it +

python mindyolo/utils/convert_weight_darknet53.py
+

+

- Distributed Training

+

It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run +

# distributed training on multiple GPU/Ascend devices
+mpirun -n 8 python train.py --config ./configs/yolov3/yolov3.yaml --device_target Ascend --is_parallel True
+

+
+

If the script is executed by the root user, the --allow-run-as-root parameter must be added to mpirun.

+
+

Similarly, you can train the model on multiple GPU devices with the above mpirun command.

+

For detailed illustration of all hyper-parameters, please refer to config.py.

+

Note: As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size.

+

- Standalone Training

+

If you want to train or finetune the model on a smaller dataset without distributed training, please run:

+
# standalone training on a CPU/GPU/Ascend device
+python train.py --config ./configs/yolov3/yolov3.yaml --device_target Ascend
+
+

Validation and Test

+

To validate the accuracy of the trained model, you can use test.py and parse the checkpoint path with --weight.

+
python test.py --config ./configs/yolov3/yolov3.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt
+
+

Deployment

+

See here.

+

References

+ +

[1] Jocher Glenn. YOLOv3 release v9.1. https://github.com/ultralytics/yolov3/releases/tag/v9.1, 2021. +[2] Joseph Redmon and Ali Farhadi. YOLOv3: An incremental improvement. arXiv preprint arXiv:1804.02767, 2018.

+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/modelzoo/yolov4/index.html b/modelzoo/yolov4/index.html new file mode 100644 index 00000000..3bd812a6 --- /dev/null +++ b/modelzoo/yolov4/index.html @@ -0,0 +1,1526 @@ + + + + + + + + + + + + + + + + + + + + + + + + YOLOv4 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

YOLOv4

+
+

YOLOv4: Optimal Speed and Accuracy of Object Detection

+
+

Abstract

+

There are a huge number of features which are said to +improve Convolutional Neural Network (CNN) accuracy. +Practical testing of combinations of such features on large +datasets, and theoretical justification of the result, is required. Some features operate on certain models exclusively +and for certain problems exclusively, or only for small-scale +datasets; while some features, such as batch-normalization +and residual-connections, are applicable to the majority of +models, tasks, and datasets. We assume that such universal +features include Weighted-Residual-Connections (WRC), +Cross-Stage-Partial-connections (CSP), Cross mini-Batch +Normalization (CmBN), Self-adversarial-training (SAT) +and Mish-activation. We use new features: WRC, CSP, +CmBN, SAT, Mish activation, Mosaic data augmentation, +CmBN, DropBlock regularization, and CIoU loss, and combine some of them to achieve state-of-the-art results: 43.5% +AP (65.7% AP50) for the MS COCO dataset at a realtime speed of 65 FPS on Tesla V100.

+
+ +
+ +

Results

+
+performance tested on Ascend 910(8p) with graph mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ParamsRecipeDownload
YOLOv4CSPDarknet5316 * 8608MS COCO 201745.427.6Myamlweights
YOLOv4CSPDarknet53(silu)16 * 8608MS COCO 201745.827.6Myamlweights
+
+
+performance tested on Ascend 910*(8p) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ms/stepParamsRecipeDownload
YOLOv4CSPDarknet5316 * 8608MS COCO 201746.1337.2527.6Myamlweights
+
+


+

Notes

+
    +
  • Box mAP: Accuracy reported on the validation set.
  • +
+

Quick Start

+

Please refer to the QUICK START in MindYOLO for details.

+

Training

+

- Pretraining Model

+

You can get the pre-training model trained on ImageNet2012 from here.

+

To convert it to a loadable ckpt file for mindyolo, please put it in the root directory then run it +

python mindyolo/utils/convert_weight_cspdarknet53.py
+

+

- Distributed Training

+

It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run +

# distributed training on multiple GPU/Ascend devices
+mpirun -n 8 python train.py --config ./configs/yolov4/yolov4-silu.yaml --device_target Ascend --is_parallel True --epochs 320
+

+
+

If the script is executed by the root user, the --allow-run-as-root parameter must be added to mpirun.

+
+

Similarly, you can train the model on multiple GPU devices with the above mpirun command.

+

For detailed illustration of all hyper-parameters, please refer to config.py.

+

Notes

+
    +
  • As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size.
  • +
  • If the following warning occurs, setting the environment variable PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning' will fix it. +
    multiprocessing/semaphore_tracker.py: 144 UserWarning: semaphore_tracker: There appear to be 235 leaked semaphores to clean up at shutdown len(cache))
    +
  • +
+

- Standalone Training

+

If you want to train or finetune the model on a smaller dataset without distributed training, please run:

+
# standalone training on a CPU/GPU/Ascend device
+python train.py --config ./configs/yolov4/yolov4-silu.yaml --device_target Ascend --epochs 320
+
+

Validation and Test

+

To validate the accuracy of the trained model, you can use test.py and parse the checkpoint path with --weight.

+
python test.py --config ./configs/yolov4/yolov4-silu.yaml --device_target Ascend --iou_thres 0.6 --weight /PATH/TO/WEIGHT.ckpt
+
+

Deployment

+

See here.

+

References

+ +

[1] Alexey Bochkovskiy, Chien-Yao Wang and Ali Farhadi. YOLOv4: Optimal Speed and Accuracy of Object Detection. arXiv preprint arXiv:2004.10934, 2020.

+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/modelzoo/yolov5/index.html b/modelzoo/yolov5/index.html new file mode 100644 index 00000000..21978c7f --- /dev/null +++ b/modelzoo/yolov5/index.html @@ -0,0 +1,1516 @@ + + + + + + + + + + + + + + + + + + + + + + + + YOLOv5 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

YOLOv5

+

Abstract

+

YOLOv5 is a family of object detection architectures and models pretrained on the COCO dataset, representing Ultralytics open-source research into future vision AI methods, incorporating lessons learned and best practices evolved over thousands of hours of research and development.

+
+ +
+ +

Results

+
+performance tested on Ascend 910(8p) with graph mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ParamsRecipeDownload
YOLOv5N32 * 8640MS COCO 201727.31.9Myamlweights
YOLOv5S32 * 8640MS COCO 201737.67.2Myamlweights
YOLOv5M32 * 8640MS COCO 201744.921.2Myamlweights
YOLOv5L32 * 8640MS COCO 201748.546.5Myamlweights
YOLOv5X16 * 8640MS COCO 201750.586.7Myamlweights
+
+
+performance tested on Ascend 910*(8p) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ms/stepParamsRecipeDownload
YOLOv5N32 * 8640MS COCO 201727.4736.081.9Myamlweights
YOLOv5S32 * 8640MS COCO 201737.6787.347.2Myamlweights
+
+


+

Notes

+
    +
  • Box mAP: Accuracy reported on the validation set.
  • +
  • We refer to the official YOLOV5 to reproduce the P5 series model, and the differences are as follows: We use 8x NPU(Ascend910) for training, and the single-NPU batch size is 32. This is different from the official code.
  • +
+

Quick Start

+

Please refer to the QUICK START in MindYOLO for details.

+

Training

+

- Distributed Training

+

It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run +

# distributed training on multiple GPU/Ascend devices
+mpirun -n 8 python train.py --config ./configs/yolov5/yolov5n.yaml --device_target Ascend --is_parallel True
+

+
+

If the script is executed by the root user, the --allow-run-as-root parameter must be added to mpirun.

+
+

Similarly, you can train the model on multiple GPU devices with the above mpirun command.

+

For detailed illustration of all hyper-parameters, please refer to config.py.

+

Note: As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size.

+

- Standalone Training

+

If you want to train or finetune the model on a smaller dataset without distributed training, please run:

+
# standalone training on a CPU/GPU/Ascend device
+python train.py --config ./configs/yolov5/yolov5n.yaml --device_target Ascend
+
+

Validation and Test

+

To validate the accuracy of the trained model, you can use test.py and parse the checkpoint path with --weight.

+
python test.py --config ./configs/yolov5/yolov5n.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt
+
+

Deployment

+

See here.

+

References

+ +

[1] Jocher Glenn. YOLOv5 release v6.1. https://github.com/ultralytics/yolov5/releases/tag/v6.1, 2022.

+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/modelzoo/yolov7/index.html b/modelzoo/yolov7/index.html new file mode 100644 index 00000000..631b8f07 --- /dev/null +++ b/modelzoo/yolov7/index.html @@ -0,0 +1,1486 @@ + + + + + + + + + + + + + + + + + + + + + + + + YOLOv7 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

YOLOv7

+
+

YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors

+
+

Abstract

+

YOLOv7 surpasses all known object detectors in both speed and accuracy in the range from 5 FPS to 160 FPS and has the highest accuracy 56.8% AP among all known real-time object detectors with 30 FPS or higher on GPU V100. YOLOv7-E6 object detector (56 FPS V100, 55.9% AP) outperforms both transformer-based detector SWIN-L Cascade-Mask R-CNN (9.2 FPS A100, 53.9% AP) by 509% in speed and 2% in accuracy, and convolutional-based detector ConvNeXt-XL Cascade-Mask R-CNN (8.6 FPS A100, 55.2% AP) by 551% in speed and 0.7% AP in accuracy, as well as YOLOv7 outperforms: YOLOR, YOLOX, Scaled-YOLOv4, YOLOv5, DETR, Deformable DETR, DINO-5scale-R50, ViT-Adapter-B and many other object detectors in speed and accuracy. Moreover, we train YOLOv7 only on MS COCO dataset from scratch without using any other datasets or pre-trained weights.

+
+ +
+ +

Results

+
+performance tested on Ascend 910(8p) with graph mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ParamsRecipeDownload
YOLOv7Tiny16 * 8640MS COCO 201737.56.2Myamlweights
YOLOv7L16 * 8640MS COCO 201750.836.9Myamlweights
YOLOv7X12 * 8640MS COCO 201752.471.3Myamlweights
+
+
+performance tested on Ascend 910*(8p) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ms/stepParamsRecipeDownload
YOLOv7Tiny16 * 8640MS COCO 201737.5496.216.2Myamlweights
+
+


+

Notes

+
    +
  • Context: Training context denoted as {device}x{pieces}-{MS mode}, where mindspore mode can be G - graph mode or F - pynative mode with ms function. For example, D910x8-G is for training on 8 pieces of Ascend 910 NPU using graph mode.
  • +
  • Box mAP: Accuracy reported on the validation set.
  • +
  • We refer to the official YOLOV7 to reproduce the P5 series model, and the differences are as follows: We use 8x NPU(Ascend910) for training, and the single-NPU batch size for tiny/l/x is 16/16/12. This is different from the official code.
  • +
+

Quick Start

+

Please refer to the QUICK START in MindYOLO for details.

+

Training

+

- Distributed Training

+

It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run +

# distributed training on multiple GPU/Ascend devices
+mpirun -n 8 python train.py --config ./configs/yolov7/yolov7.yaml --device_target Ascend --is_parallel True
+

+
+

If the script is executed by the root user, the --allow-run-as-root parameter must be added to mpirun.

+
+

Similarly, you can train the model on multiple GPU devices with the above mpirun command.

+

For detailed illustration of all hyper-parameters, please refer to config.py.

+

Note: As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size.

+

- Standalone Training

+

If you want to train or finetune the model on a smaller dataset without distributed training, please run:

+
# standalone training on a CPU/GPU/Ascend device
+python train.py --config ./configs/yolov7/yolov7.yaml --device_target Ascend
+
+

Validation and Test

+

To validate the accuracy of the trained model, you can use test.py and parse the checkpoint path with --weight.

+
python test.py --config ./configs/yolov7/yolov7.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt
+
+

Deployment

+

See here.

+

References

+ +

[1] Chien-Yao Wang, Alexey Bochkovskiy, and HongYuan Mark Liao. Yolov7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. arXiv preprint arXiv:2207.02696, 2022.

+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/modelzoo/yolov8/index.html b/modelzoo/yolov8/index.html new file mode 100644 index 00000000..ae968b9b --- /dev/null +++ b/modelzoo/yolov8/index.html @@ -0,0 +1,1578 @@ + + + + + + + + + + + + + + + + + + + + + + + + YOLOv8 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

YOLOv8

+

Abstract

+

Ultralytics YOLOv8, developed by Ultralytics, is a cutting-edge, state-of-the-art (SOTA) model that builds upon the success of previous YOLO versions and introduces new features and improvements to further boost performance and flexibility. YOLOv8 is designed to be fast, accurate, and easy to use, making it an excellent choice for a wide range of object detection, image segmentation and image classification tasks.

+
+ +
+ +

Results

+

Detection

+
+performance tested on Ascend 910(8p) with graph mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ParamsRecipeDownload
YOLOv8N16 * 8640MS COCO 201737.23.2Myamlweights
YOLOv8S16 * 8640MS COCO 201744.611.2Myamlweights
YOLOv8M16 * 8640MS COCO 201750.525.9Myamlweights
YOLOv8L16 * 8640MS COCO 201752.843.7Myamlweights
YOLOv8X16 * 8640MS COCO 201753.768.2Myamlweights
+
+
+performance tested on Ascend 910*(8p) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ms/stepParamsRecipeDownload
YOLOv8N16 * 8640MS COCO 201737.3373.553.2Myamlweights
YOLOv8S16 * 8640MS COCO 201744.7365.5311.2Myamlweights
+
+

Segmentation

+
+performance tested on Ascend 910(8p) with graph mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)Mask mAP (%)ParamsRecipeDownload
YOLOv8-segX16 * 8640MS COCO 201752.542.971.8Myamlweights
+
+

Notes

+
    +
  • Box mAP: Accuracy reported on the validation set.
  • +
  • We refer to the official YOLOV8 to reproduce the P5 series model.
  • +
+

Quick Start

+

Please refer to the QUICK START in MindYOLO for details.

+

Training

+

- Distributed Training

+

It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run +

# distributed training on multiple GPU/Ascend devices
+mpirun -n 8 python train.py --config ./configs/yolov8/yolov8n.yaml --device_target Ascend --is_parallel True
+

+
+

If the script is executed by the root user, the --allow-run-as-root parameter must be added to mpirun.

+
+

Similarly, you can train the model on multiple GPU devices with the above mpirun command.

+

For detailed illustration of all hyper-parameters, please refer to config.py.

+

Note: As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size.

+

- Standalone Training

+

If you want to train or finetune the model on a smaller dataset without distributed training, please run:

+
# standalone training on a CPU/GPU/Ascend device
+python train.py --config ./configs/yolov8/yolov8n.yaml --device_target Ascend
+
+

Validation and Test

+

To validate the accuracy of the trained model, you can use test.py and parse the checkpoint path with --weight.

+
python test.py --config ./configs/yolov8/yolov8n.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt
+
+

Deployment

+

See here.

+

References

+ +

[1] Jocher Glenn. Ultralytics YOLOv8. https://github.com/ultralytics/ultralytics, 2023.

+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/modelzoo/yolox/index.html b/modelzoo/yolox/index.html new file mode 100644 index 00000000..25aaee66 --- /dev/null +++ b/modelzoo/yolox/index.html @@ -0,0 +1,1526 @@ + + + + + + + + + + + + + + + + + + + + + + + + YOLOx - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

YOLOX

+

Abstract

+

YOLOX is a new high-performance detector with some experienced improvements to YOLO series. We switch the YOLO detector to an anchor-free manner and conduct other advanced detection techniques, i.e., a decoupled head and the leading label assignment strategy SimOTA to achieve state-of-the-art results across a large scale range of models: For YOLO-Nano with only 0.91M parameters and 1.08G FLOPs, we get 25.3% AP on COCO, surpassing NanoDet by 1.8% AP; for YOLOv3, one of the most widely used detectors in industry, we boost it to 47.3% AP on COCO, outperforming the current best practice by 3.0% AP; for YOLOX-L with roughly the same amount of parameters as YOLOv4-CSP, YOLOv5-L, we achieve 50.0% AP on COCO at a speed of 68.9 FPS on Tesla V100, exceeding YOLOv5-L by 1.8% AP. Further, we won the 1st Place on Streaming Perception Challenge (Workshop on Autonomous Driving at CVPR 2021) using a single YOLOX-L model.

+
+ +
+ +

Results

+
+performance tested on Ascend 910(8p) with graph mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ParamsRecipeDownload
YOLOXN8 * 8416MS COCO 201724.10.9Myamlweights
YOLOXTiny8 * 8416MS COCO 201733.35.1Myamlweights
YOLOXS8 * 8640MS COCO 201740.79.0Myamlweights
YOLOXM8 * 8640MS COCO 201746.725.3Myamlweights
YOLOXL8 * 8640MS COCO 201749.254.2Myamlweights
YOLOXX8 * 8640MS COCO 201751.699.1Myamlweights
YOLOXDarknet538 * 8640MS COCO 201747.763.7Myamlweights
+
+
+performance tested on Ascend 910*(8p) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ms/stepParamsRecipeDownload
YOLOXS8 * 8640MS COCO 201741.0242.159.0Myamlweights
+
+


+

Notes

+
    +
  • Box mAP: Accuracy reported on the validation set.
  • +
  • We refer to the official YOLOX to reproduce the results.
  • +
+

Quick Start

+

Please refer to the QUICK START in MindYOLO for details.

+

Training

+

- Distributed Training

+

It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run +

# distributed training on multiple GPU/Ascend devices
+mpirun -n 8 python train.py --config ./configs/yolox/yolox-s.yaml --device_target Ascend --is_parallel True
+

+
+

If the script is executed by the root user, the --allow-run-as-root parameter must be added to mpirun.

+
+

Similarly, you can train the model on multiple GPU devices with the above mpirun command.

+

For detailed illustration of all hyper-parameters, please refer to config.py.

+

Note: As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction.

+

- Standalone Training

+

If you want to train or finetune the model on a smaller dataset without distributed training, please firstly run:

+
# standalone 1st stage training on a CPU/GPU/Ascend device
+python train.py --config ./configs/yolox/yolox-s.yaml --device_target Ascend
+
+

Validation and Test

+

To validate the accuracy of the trained model, you can use test.py and parse the checkpoint path with --weight.

+
python test.py --config ./configs/yolox/yolox-s.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt
+
+

Deployment

+

See here.

+

References

+ +

[1] Zheng Ge. YOLOX: Exceeding YOLO Series in 2021. https://arxiv.org/abs/2107.08430, 2021.

+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/notes/changelog/index.html b/notes/changelog/index.html new file mode 100644 index 00000000..83acc9a8 --- /dev/null +++ b/notes/changelog/index.html @@ -0,0 +1,1140 @@ + + + + + + + + + + + + + + + + + + + + + + + + Change Log - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Change Log

+

Coming soon.

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/notes/code_of_conduct/index.html b/notes/code_of_conduct/index.html new file mode 100644 index 00000000..f6b888a6 --- /dev/null +++ b/notes/code_of_conduct/index.html @@ -0,0 +1,1140 @@ + + + + + + + + + + + + + + + + + + + + + + + + Code of Conduct - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Code of Conduct

+

Coming soon.

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/notes/contributing/index.html b/notes/contributing/index.html new file mode 100644 index 00000000..8559988b --- /dev/null +++ b/notes/contributing/index.html @@ -0,0 +1,1387 @@ + + + + + + + + + + + + + + + + + + + + + + + + Contributing - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

MindYOLO contributing guidelines

+

Contributor License Agreement

+

It's required to sign CLA before your first code submission to MindYOLO community.

+

For individual contributor, please refer to ICLA online document for the detailed information.

+

Getting Started

+ +

Contribution Workflow

+

Code style

+

Please follow this style to make MindYOLO easy to review, maintain and develop.

+
    +
  • +

    Coding guidelines

    +

    The Python coding style suggested by Python PEP 8 Coding Style and C++ coding style suggested by Google C++ Coding Guidelines are used in MindYOLO community. The CppLint, CppCheck, CMakeLint, CodeSpell, Lizard, ShellCheck and PyLint are used to check the format of codes, installing these plugins in your IDE is recommended.

    +
  • +
  • +

    Unittest guidelines

    +

    The Python unittest style suggested by pytest and C++ unittest style suggested by Googletest Primer are used in MindYOLO community. The design intent of a testcase should be reflected by its name of comment.

    +
  • +
  • +

    Refactoring guidelines

    +

    We encourage developers to refactor our code to eliminate the code smell. All codes should conform to needs to the coding style and testing style, and refactoring codes are no exception. Lizard threshold for nloc (lines of code without comments) is 100 and for cnc (cyclomatic complexity number) is 20, when you receive a Lizard warning, you have to refactor the code you want to merge.

    +
  • +
  • +

    Document guidelines

    +

    We use MarkdownLint to check the format of markdown documents. MindYOLO CI modifies the following rules based on the default configuration.

    +
      +
    • MD007 (unordered list indentation): The indent parameter is set to 4, indicating that all content in the unordered list needs to be indented using four spaces.
    • +
    • MD009 (spaces at the line end): The br_spaces parameter is set to 2, indicating that there can be 0 or 2 spaces at the end of a line.
    • +
    • MD029 (sequence numbers of an ordered list): The style parameter is set to ordered, indicating that the sequence numbers of the ordered list are in ascending order.
    • +
    +

    For details, please refer to RULES.

    +
  • +
+

Fork-Pull development model

+
    +
  • +

    Fork MindYOLO repository

    +

    Before submitting code to MindYOLO project, please make sure that this project have been forked to your own repository. It means that there will be parallel development between MindYOLO repository and your own repository, so be careful to avoid the inconsistency between them.

    +
  • +
  • +

    Clone the remote repository

    +

    If you want to download the code to the local machine, git is the best way:

    +
    # For GitHub
    +git clone https://github.com/{insert_your_forked_repo}/mindyolo.git
    +git remote add upstream https://github.com/mindspore-lab/mindyolo.git
    +
    +
  • +
  • +

    Develop code locally

    +

    To avoid inconsistency between multiple branches, checking out to a new branch is SUGGESTED:

    +
    git checkout -b {new_branch_name} origin/master
    +
    +

    Taking the master branch as an example, MindYOLO may create version branches and downstream development branches as needed, please fix bugs upstream first. +Then you can change the code arbitrarily.

    +
  • +
  • +

    Push the code to the remote repository

    +

    After updating the code, you should push the update in the formal way:

    +
    git add .
    +git status # Check the update status
    +git commit -m "Your commit title"
    +git commit -s --amend #Add the concrete description of your commit
    +git push origin {new_branch_name}
    +
    +
  • +
  • +

    Pull a request to MindYOLO repository

    +

    In the last step, your need to pull a compare request between your new branch and MindYOLO master branch. After finishing the pull request, the Jenkins CI will be automatically set up for building test. Your pull request should be merged into the upstream master branch as soon as possible to reduce the risk of merging.

    +
  • +
+

Report issues

+

A great way to contribute to the project is to send a detailed report when you encounter an issue. We always appreciate a well-written, thorough bug report, and will thank you for it!

+

When reporting issues, refer to this format:

+
    +
  • What version of env (MindSpore, os, python, MindYOLO etc) are you using?
  • +
  • Is this a BUG REPORT or FEATURE REQUEST?
  • +
  • What kind of issue is, add the labels to highlight it on the issue dashboard.
  • +
  • What happened?
  • +
  • What you expected to happen?
  • +
  • How to reproduce it?(as minimally and precisely as possible)
  • +
  • Special notes for your reviewers?
  • +
+

Issues advisory:

+
    +
  • If you find an unclosed issue, which is exactly what you are going to solve, please put some comments on that issue to tell others you would be in charge of it.
  • +
  • If an issue is opened for a while, it's recommended for contributors to precheck before working on solving that issue.
  • +
  • If you resolve an issue which is reported by yourself, it's also required to let others know before closing that issue.
  • +
  • If you want the issue to be responded as quickly as possible, please try to label it, you can find kinds of labels on Label List
  • +
+

Propose PRs

+
    +
  • Raise your idea as an issue on GitHub
  • +
  • If it is a new feature that needs lots of design details, a design proposal should also be submitted.
  • +
  • After reaching consensus in the issue discussions and design proposal reviews, complete the development on the forked repo and submit a PR.
  • +
  • None of PRs is not permitted until it receives 2+ LGTM from approvers. Please NOTICE that approver is NOT allowed to add LGTM on his own PR.
  • +
  • After PR is sufficiently discussed, it will get merged, abandoned or rejected depending on the outcome of the discussion.
  • +
+

PRs advisory:

+
    +
  • Any irrelevant changes should be avoided.
  • +
  • Make sure your commit history being ordered.
  • +
  • Always keep your branch up with the master branch.
  • +
  • For bug-fix PRs, make sure all related issues being linked.
  • +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/notes/faq/index.html b/notes/faq/index.html new file mode 100644 index 00000000..17ff0ce4 --- /dev/null +++ b/notes/faq/index.html @@ -0,0 +1,1123 @@ + + + + + + + + + + + + + + + + + + + + + + FAQ - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+ +
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/objects.inv b/objects.inv new file mode 100644 index 00000000..4beeb9ad --- /dev/null +++ b/objects.inv @@ -0,0 +1,7 @@ +# Sphinx inventory version 2 +# Project: MindYOLO Docs +# Version: 0.0.0 +# The remainder of this file is compressed using zlib. +xڽN0>LB zG Ɩ5R] IOФM:8e/ KY??/ +tgJ +Z ]ѽD(Ow?jI*|afU4\AZbY(2NT-']Gйb`CSpN7;jW4&Bդ?W1#XeVsҴ܆q5ub*!v%$TBJHԕQ ##:-5Z'c R1A;+ySk$G J4{ G J4L#G J4ǩKM@Ի )N \ No newline at end of file diff --git a/reference/data/index.html b/reference/data/index.html new file mode 100644 index 00000000..13a6a9a6 --- /dev/null +++ b/reference/data/index.html @@ -0,0 +1,4265 @@ + + + + + + + + + + + + + + + + + + + + + + + + Data - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Data

+

Data Loader

+ + +
+ + +

+ mindyolo.data.loader.create_loader(dataset, batch_collate_fn, column_names_getitem, column_names_collate, batch_size, epoch_size=1, rank=0, rank_size=1, num_parallel_workers=8, shuffle=True, drop_remainder=False, python_multiprocessing=False) + +

+ + +
+ +

Creates dataloader.

+

Applies operations such as transform and batch to the ms.dataset.Dataset object +created by the create_dataset function to get the dataloader.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
dataset +
+

dataset object created by create_dataset.

+
+

+ + TYPE: + COCODataset + +

+
batch_size +
+

The number of rows each batch is created with. An +int or callable object which takes exactly 1 parameter, BatchInfo.

+
+

+ + TYPE: + int or function + +

+
drop_remainder +
+

Determines whether to drop the last block +whose data row number is less than batch size (default=False). If True, and if there are less +than batch_size rows available to make the last batch, then those rows will +be dropped and not propagated to the child node.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_parallel_workers +
+

Number of workers(threads) to process the dataset in parallel +(default=None).

+
+

+ + TYPE: + int + + + DEFAULT: + 8 + +

+
python_multiprocessing +
+

Parallelize Python operations with multiple worker processes. This +option could be beneficial if the Python operation is computational heavy (default=False).

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

BatchDataset, dataset batched.

+
+
+ +
+ Source code in mindyolo/data/loader.py +
14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
def create_loader(
+    dataset,
+    batch_collate_fn,
+    column_names_getitem,
+    column_names_collate,
+    batch_size,
+    epoch_size=1,
+    rank=0,
+    rank_size=1,
+    num_parallel_workers=8,
+    shuffle=True,
+    drop_remainder=False,
+    python_multiprocessing=False,
+):
+    r"""Creates dataloader.
+
+    Applies operations such as transform and batch to the `ms.dataset.Dataset` object
+    created by the `create_dataset` function to get the dataloader.
+
+    Args:
+        dataset (COCODataset): dataset object created by `create_dataset`.
+        batch_size (int or function): The number of rows each batch is created with. An
+            int or callable object which takes exactly 1 parameter, BatchInfo.
+        drop_remainder (bool, optional): Determines whether to drop the last block
+            whose data row number is less than batch size (default=False). If True, and if there are less
+            than batch_size rows available to make the last batch, then those rows will
+            be dropped and not propagated to the child node.
+        num_parallel_workers (int, optional): Number of workers(threads) to process the dataset in parallel
+            (default=None).
+        python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker processes. This
+            option could be beneficial if the Python operation is computational heavy (default=False).
+
+    Returns:
+        BatchDataset, dataset batched.
+    """
+    de.config.set_seed(1236517205 + rank)
+    cores = multiprocessing.cpu_count()
+    num_parallel_workers = min(int(cores / rank_size), num_parallel_workers)
+    logger.info(f"Dataloader num parallel workers: [{num_parallel_workers}]")
+    if rank_size > 1:
+        ds = de.GeneratorDataset(
+            dataset,
+            column_names=column_names_getitem,
+            num_parallel_workers=min(8, num_parallel_workers),
+            shuffle=shuffle,
+            python_multiprocessing=python_multiprocessing,
+            num_shards=rank_size,
+            shard_id=rank,
+        )
+    else:
+        ds = de.GeneratorDataset(
+            dataset,
+            column_names=column_names_getitem,
+            num_parallel_workers=min(32, num_parallel_workers),
+            shuffle=shuffle,
+            python_multiprocessing=python_multiprocessing,
+        )
+    ds = ds.batch(
+        batch_size, per_batch_map=batch_collate_fn,
+        input_columns=column_names_getitem, output_columns=column_names_collate, drop_remainder=drop_remainder
+    )
+    ds = ds.repeat(epoch_size)
+
+    return ds
+
+
+
+ +

Dataset

+ + +
+ + + +

+ mindyolo.data.dataset.COCODataset + + +

+ + +
+ + +

Load the COCO dataset (yolo format coco labels)

+ + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
dataset_path +
+

dataset label directory for dataset.

+
+

+ + TYPE: + str + + + DEFAULT: + '' + +

+
for +
+

COCO_ROOT + ├── train2017.txt + ├── annotations + │ └── instances_train2017.json + ├── images + │ └── train2017 + │ ├── 000000000001.jpg + │ └── 000000000002.jpg + └── labels + └── train2017 + ├── 000000000001.txt + └── 000000000002.txt +dataset_path (str): ./coco/train2017.txt

+
+

+ + TYPE: + example + +

+
transforms +
+

A list of images data enhancements +that apply data enhancements on data set objects in order.

+
+

+ + TYPE: + list + +

+
+ +
+ Source code in mindyolo/data/dataset.py +
  27
+  28
+  29
+  30
+  31
+  32
+  33
+  34
+  35
+  36
+  37
+  38
+  39
+  40
+  41
+  42
+  43
+  44
+  45
+  46
+  47
+  48
+  49
+  50
+  51
+  52
+  53
+  54
+  55
+  56
+  57
+  58
+  59
+  60
+  61
+  62
+  63
+  64
+  65
+  66
+  67
+  68
+  69
+  70
+  71
+  72
+  73
+  74
+  75
+  76
+  77
+  78
+  79
+  80
+  81
+  82
+  83
+  84
+  85
+  86
+  87
+  88
+  89
+  90
+  91
+  92
+  93
+  94
+  95
+  96
+  97
+  98
+  99
+ 100
+ 101
+ 102
+ 103
+ 104
+ 105
+ 106
+ 107
+ 108
+ 109
+ 110
+ 111
+ 112
+ 113
+ 114
+ 115
+ 116
+ 117
+ 118
+ 119
+ 120
+ 121
+ 122
+ 123
+ 124
+ 125
+ 126
+ 127
+ 128
+ 129
+ 130
+ 131
+ 132
+ 133
+ 134
+ 135
+ 136
+ 137
+ 138
+ 139
+ 140
+ 141
+ 142
+ 143
+ 144
+ 145
+ 146
+ 147
+ 148
+ 149
+ 150
+ 151
+ 152
+ 153
+ 154
+ 155
+ 156
+ 157
+ 158
+ 159
+ 160
+ 161
+ 162
+ 163
+ 164
+ 165
+ 166
+ 167
+ 168
+ 169
+ 170
+ 171
+ 172
+ 173
+ 174
+ 175
+ 176
+ 177
+ 178
+ 179
+ 180
+ 181
+ 182
+ 183
+ 184
+ 185
+ 186
+ 187
+ 188
+ 189
+ 190
+ 191
+ 192
+ 193
+ 194
+ 195
+ 196
+ 197
+ 198
+ 199
+ 200
+ 201
+ 202
+ 203
+ 204
+ 205
+ 206
+ 207
+ 208
+ 209
+ 210
+ 211
+ 212
+ 213
+ 214
+ 215
+ 216
+ 217
+ 218
+ 219
+ 220
+ 221
+ 222
+ 223
+ 224
+ 225
+ 226
+ 227
+ 228
+ 229
+ 230
+ 231
+ 232
+ 233
+ 234
+ 235
+ 236
+ 237
+ 238
+ 239
+ 240
+ 241
+ 242
+ 243
+ 244
+ 245
+ 246
+ 247
+ 248
+ 249
+ 250
+ 251
+ 252
+ 253
+ 254
+ 255
+ 256
+ 257
+ 258
+ 259
+ 260
+ 261
+ 262
+ 263
+ 264
+ 265
+ 266
+ 267
+ 268
+ 269
+ 270
+ 271
+ 272
+ 273
+ 274
+ 275
+ 276
+ 277
+ 278
+ 279
+ 280
+ 281
+ 282
+ 283
+ 284
+ 285
+ 286
+ 287
+ 288
+ 289
+ 290
+ 291
+ 292
+ 293
+ 294
+ 295
+ 296
+ 297
+ 298
+ 299
+ 300
+ 301
+ 302
+ 303
+ 304
+ 305
+ 306
+ 307
+ 308
+ 309
+ 310
+ 311
+ 312
+ 313
+ 314
+ 315
+ 316
+ 317
+ 318
+ 319
+ 320
+ 321
+ 322
+ 323
+ 324
+ 325
+ 326
+ 327
+ 328
+ 329
+ 330
+ 331
+ 332
+ 333
+ 334
+ 335
+ 336
+ 337
+ 338
+ 339
+ 340
+ 341
+ 342
+ 343
+ 344
+ 345
+ 346
+ 347
+ 348
+ 349
+ 350
+ 351
+ 352
+ 353
+ 354
+ 355
+ 356
+ 357
+ 358
+ 359
+ 360
+ 361
+ 362
+ 363
+ 364
+ 365
+ 366
+ 367
+ 368
+ 369
+ 370
+ 371
+ 372
+ 373
+ 374
+ 375
+ 376
+ 377
+ 378
+ 379
+ 380
+ 381
+ 382
+ 383
+ 384
+ 385
+ 386
+ 387
+ 388
+ 389
+ 390
+ 391
+ 392
+ 393
+ 394
+ 395
+ 396
+ 397
+ 398
+ 399
+ 400
+ 401
+ 402
+ 403
+ 404
+ 405
+ 406
+ 407
+ 408
+ 409
+ 410
+ 411
+ 412
+ 413
+ 414
+ 415
+ 416
+ 417
+ 418
+ 419
+ 420
+ 421
+ 422
+ 423
+ 424
+ 425
+ 426
+ 427
+ 428
+ 429
+ 430
+ 431
+ 432
+ 433
+ 434
+ 435
+ 436
+ 437
+ 438
+ 439
+ 440
+ 441
+ 442
+ 443
+ 444
+ 445
+ 446
+ 447
+ 448
+ 449
+ 450
+ 451
+ 452
+ 453
+ 454
+ 455
+ 456
+ 457
+ 458
+ 459
+ 460
+ 461
+ 462
+ 463
+ 464
+ 465
+ 466
+ 467
+ 468
+ 469
+ 470
+ 471
+ 472
+ 473
+ 474
+ 475
+ 476
+ 477
+ 478
+ 479
+ 480
+ 481
+ 482
+ 483
+ 484
+ 485
+ 486
+ 487
+ 488
+ 489
+ 490
+ 491
+ 492
+ 493
+ 494
+ 495
+ 496
+ 497
+ 498
+ 499
+ 500
+ 501
+ 502
+ 503
+ 504
+ 505
+ 506
+ 507
+ 508
+ 509
+ 510
+ 511
+ 512
+ 513
+ 514
+ 515
+ 516
+ 517
+ 518
+ 519
+ 520
+ 521
+ 522
+ 523
+ 524
+ 525
+ 526
+ 527
+ 528
+ 529
+ 530
+ 531
+ 532
+ 533
+ 534
+ 535
+ 536
+ 537
+ 538
+ 539
+ 540
+ 541
+ 542
+ 543
+ 544
+ 545
+ 546
+ 547
+ 548
+ 549
+ 550
+ 551
+ 552
+ 553
+ 554
+ 555
+ 556
+ 557
+ 558
+ 559
+ 560
+ 561
+ 562
+ 563
+ 564
+ 565
+ 566
+ 567
+ 568
+ 569
+ 570
+ 571
+ 572
+ 573
+ 574
+ 575
+ 576
+ 577
+ 578
+ 579
+ 580
+ 581
+ 582
+ 583
+ 584
+ 585
+ 586
+ 587
+ 588
+ 589
+ 590
+ 591
+ 592
+ 593
+ 594
+ 595
+ 596
+ 597
+ 598
+ 599
+ 600
+ 601
+ 602
+ 603
+ 604
+ 605
+ 606
+ 607
+ 608
+ 609
+ 610
+ 611
+ 612
+ 613
+ 614
+ 615
+ 616
+ 617
+ 618
+ 619
+ 620
+ 621
+ 622
+ 623
+ 624
+ 625
+ 626
+ 627
+ 628
+ 629
+ 630
+ 631
+ 632
+ 633
+ 634
+ 635
+ 636
+ 637
+ 638
+ 639
+ 640
+ 641
+ 642
+ 643
+ 644
+ 645
+ 646
+ 647
+ 648
+ 649
+ 650
+ 651
+ 652
+ 653
+ 654
+ 655
+ 656
+ 657
+ 658
+ 659
+ 660
+ 661
+ 662
+ 663
+ 664
+ 665
+ 666
+ 667
+ 668
+ 669
+ 670
+ 671
+ 672
+ 673
+ 674
+ 675
+ 676
+ 677
+ 678
+ 679
+ 680
+ 681
+ 682
+ 683
+ 684
+ 685
+ 686
+ 687
+ 688
+ 689
+ 690
+ 691
+ 692
+ 693
+ 694
+ 695
+ 696
+ 697
+ 698
+ 699
+ 700
+ 701
+ 702
+ 703
+ 704
+ 705
+ 706
+ 707
+ 708
+ 709
+ 710
+ 711
+ 712
+ 713
+ 714
+ 715
+ 716
+ 717
+ 718
+ 719
+ 720
+ 721
+ 722
+ 723
+ 724
+ 725
+ 726
+ 727
+ 728
+ 729
+ 730
+ 731
+ 732
+ 733
+ 734
+ 735
+ 736
+ 737
+ 738
+ 739
+ 740
+ 741
+ 742
+ 743
+ 744
+ 745
+ 746
+ 747
+ 748
+ 749
+ 750
+ 751
+ 752
+ 753
+ 754
+ 755
+ 756
+ 757
+ 758
+ 759
+ 760
+ 761
+ 762
+ 763
+ 764
+ 765
+ 766
+ 767
+ 768
+ 769
+ 770
+ 771
+ 772
+ 773
+ 774
+ 775
+ 776
+ 777
+ 778
+ 779
+ 780
+ 781
+ 782
+ 783
+ 784
+ 785
+ 786
+ 787
+ 788
+ 789
+ 790
+ 791
+ 792
+ 793
+ 794
+ 795
+ 796
+ 797
+ 798
+ 799
+ 800
+ 801
+ 802
+ 803
+ 804
+ 805
+ 806
+ 807
+ 808
+ 809
+ 810
+ 811
+ 812
+ 813
+ 814
+ 815
+ 816
+ 817
+ 818
+ 819
+ 820
+ 821
+ 822
+ 823
+ 824
+ 825
+ 826
+ 827
+ 828
+ 829
+ 830
+ 831
+ 832
+ 833
+ 834
+ 835
+ 836
+ 837
+ 838
+ 839
+ 840
+ 841
+ 842
+ 843
+ 844
+ 845
+ 846
+ 847
+ 848
+ 849
+ 850
+ 851
+ 852
+ 853
+ 854
+ 855
+ 856
+ 857
+ 858
+ 859
+ 860
+ 861
+ 862
+ 863
+ 864
+ 865
+ 866
+ 867
+ 868
+ 869
+ 870
+ 871
+ 872
+ 873
+ 874
+ 875
+ 876
+ 877
+ 878
+ 879
+ 880
+ 881
+ 882
+ 883
+ 884
+ 885
+ 886
+ 887
+ 888
+ 889
+ 890
+ 891
+ 892
+ 893
+ 894
+ 895
+ 896
+ 897
+ 898
+ 899
+ 900
+ 901
+ 902
+ 903
+ 904
+ 905
+ 906
+ 907
+ 908
+ 909
+ 910
+ 911
+ 912
+ 913
+ 914
+ 915
+ 916
+ 917
+ 918
+ 919
+ 920
+ 921
+ 922
+ 923
+ 924
+ 925
+ 926
+ 927
+ 928
+ 929
+ 930
+ 931
+ 932
+ 933
+ 934
+ 935
+ 936
+ 937
+ 938
+ 939
+ 940
+ 941
+ 942
+ 943
+ 944
+ 945
+ 946
+ 947
+ 948
+ 949
+ 950
+ 951
+ 952
+ 953
+ 954
+ 955
+ 956
+ 957
+ 958
+ 959
+ 960
+ 961
+ 962
+ 963
+ 964
+ 965
+ 966
+ 967
+ 968
+ 969
+ 970
+ 971
+ 972
+ 973
+ 974
+ 975
+ 976
+ 977
+ 978
+ 979
+ 980
+ 981
+ 982
+ 983
+ 984
+ 985
+ 986
+ 987
+ 988
+ 989
+ 990
+ 991
+ 992
+ 993
+ 994
+ 995
+ 996
+ 997
+ 998
+ 999
+1000
+1001
+1002
+1003
+1004
+1005
+1006
+1007
+1008
+1009
+1010
+1011
+1012
+1013
+1014
+1015
+1016
+1017
+1018
+1019
+1020
+1021
+1022
+1023
+1024
+1025
+1026
+1027
+1028
+1029
+1030
+1031
+1032
+1033
+1034
+1035
+1036
+1037
+1038
+1039
+1040
+1041
+1042
+1043
+1044
+1045
+1046
+1047
+1048
+1049
+1050
+1051
+1052
+1053
+1054
+1055
+1056
+1057
+1058
+1059
+1060
+1061
+1062
+1063
+1064
+1065
+1066
+1067
+1068
+1069
+1070
+1071
+1072
+1073
+1074
+1075
+1076
+1077
+1078
+1079
+1080
+1081
+1082
+1083
+1084
+1085
+1086
+1087
+1088
+1089
+1090
+1091
+1092
+1093
+1094
+1095
+1096
+1097
+1098
+1099
+1100
+1101
+1102
+1103
+1104
+1105
+1106
+1107
+1108
+1109
+1110
+1111
+1112
+1113
+1114
+1115
+1116
+1117
+1118
+1119
+1120
+1121
+1122
+1123
+1124
+1125
+1126
+1127
+1128
+1129
+1130
+1131
+1132
+1133
+1134
+1135
+1136
+1137
+1138
+1139
+1140
+1141
+1142
+1143
+1144
+1145
+1146
+1147
+1148
+1149
+1150
+1151
+1152
+1153
+1154
+1155
+1156
+1157
+1158
+1159
+1160
+1161
+1162
+1163
+1164
+1165
+1166
+1167
+1168
+1169
+1170
+1171
+1172
+1173
+1174
+1175
+1176
+1177
+1178
+1179
+1180
+1181
+1182
+1183
+1184
+1185
+1186
+1187
+1188
class COCODataset:
+    """
+    Load the COCO dataset (yolo format coco labels)
+
+    Args:
+        dataset_path (str): dataset label directory for dataset.
+        for example:
+            COCO_ROOT
+                ├── train2017.txt
+                ├── annotations
+                │     └── instances_train2017.json
+                ├── images
+                │     └── train2017
+                │             ├── 000000000001.jpg
+                │             └── 000000000002.jpg
+                └── labels
+                      └── train2017
+                              ├── 000000000001.txt
+                              └── 000000000002.txt
+            dataset_path (str): ./coco/train2017.txt
+        transforms (list): A list of images data enhancements
+            that apply data enhancements on data set objects in order.
+    """
+
+    def __init__(
+        self,
+        dataset_path="",
+        img_size=640,
+        transforms_dict=None,
+        is_training=False,
+        augment=False,
+        rect=False,
+        single_cls=False,
+        batch_size=32,
+        stride=32,
+        num_cls=80,
+        pad=0.0,
+        return_segments=False,  # for segment
+        return_keypoints=False, # for keypoint
+        nkpt=0,                 # for keypoint
+        ndim=0                  # for keypoint
+    ):
+        # acceptable image suffixes
+        self.img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo']
+        self.cache_version = 0.2
+
+        self.return_segments = return_segments
+        self.return_keypoints = return_keypoints
+        assert not (return_segments and return_keypoints), 'Can not return both segments and keypoints.'
+
+        self.path = dataset_path
+        self.img_size = img_size
+        self.augment = augment
+        self.rect = rect
+        self.stride = stride
+        self.num_cls = num_cls
+        self.nkpt = nkpt
+        self.ndim = ndim
+        self.transforms_dict = transforms_dict
+        self.is_training = is_training
+
+        # set column names
+        self.column_names_getitem = ['samples']
+        if self.is_training:
+            self.column_names_collate = ['images', 'labels']
+            if self.return_segments:
+                self.column_names_collate = ['images', 'labels', 'masks']
+            elif self.return_keypoints:
+                self.column_names_collate = ['images', 'labels', 'keypoints']
+        else:
+            self.column_names_collate = ["images", "img_files", "hw_ori", "hw_scale", "pad"]
+
+        try:
+            f = []  # image files
+            for p in self.path if isinstance(self.path, list) else [self.path]:
+                p = Path(p)  # os-agnostic
+                if p.is_dir():  # dir
+                    f += glob.glob(str(p / "**" / "*.*"), recursive=True)
+                elif p.is_file():  # file
+                    with open(p, "r") as t:
+                        t = t.read().strip().splitlines()
+                        parent = str(p.parent) + os.sep
+                        f += [x.replace("./", parent) if x.startswith("./") else x for x in t]  # local to global path
+                else:
+                    raise Exception(f"{p} does not exist")
+            self.img_files = sorted([x.replace("/", os.sep) for x in f if x.split(".")[-1].lower() in self.img_formats])
+            assert self.img_files, f"No images found"
+        except Exception as e:
+            raise Exception(f"Error loading data from {self.path}: {e}\n")
+
+        # Check cache
+        self.label_files = self._img2label_paths(self.img_files)  # labels
+        cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix(".cache.npy")  # cached labels
+        if cache_path.is_file():
+            cache, exists = np.load(cache_path, allow_pickle=True).item(), True  # load dict
+            if cache["version"] == self.cache_version \
+                    and cache["hash"] == self._get_hash(self.label_files + self.img_files):
+                logger.info(f"Dataset Cache file hash/version check success.")
+                logger.info(f"Load dataset cache from [{cache_path}] success.")
+            else:
+                logger.info(f"Dataset cache file hash/version check fail.")
+                logger.info(f"Datset caching now...")
+                cache, exists = self.cache_labels(cache_path), False  # cache
+                logger.info(f"Dataset caching success.")
+        else:
+            logger.info(f"No dataset cache available, caching now...")
+            cache, exists = self.cache_labels(cache_path), False  # cache
+            logger.info(f"Dataset caching success.")
+
+        # Display cache
+        nf, nm, ne, nc, n = cache.pop("results")  # found, missing, empty, corrupted, total
+        if exists:
+            d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"
+            tqdm(None, desc=d, total=n, initial=n)  # display cache results
+        assert nf > 0 or not augment, f"No labels in {cache_path}. Can not train without labels."
+
+        # Read cache
+        cache.pop("hash")  # remove hash
+        cache.pop("version")  # remove version
+        self.labels = cache['labels']
+        self.img_files = [lb['im_file'] for lb in self.labels]  # update im_files
+
+        # Check if the dataset is all boxes or all segments
+        lengths = ((len(lb['cls']), len(lb['bboxes']), len(lb['segments'])) for lb in self.labels)
+        len_cls, len_boxes, len_segments = (sum(x) for x in zip(*lengths))
+        if len_segments and len_boxes != len_segments:
+            print(
+                f'WARNING ⚠️ Box and segment counts should be equal, but got len(segments) = {len_segments}, '
+                f'len(boxes) = {len_boxes}. To resolve this only boxes will be used and all segments will be removed. '
+                'To avoid this please supply either a detect or segment dataset, not a detect-segment mixed dataset.')
+            for lb in self.labels:
+                lb['segments'] = []
+        if len_cls == 0:
+            raise ValueError(f'All labels empty in {cache_path}, can not start training without labels.')
+
+        if single_cls:
+            for x in self.labels:
+                x['cls'][:, 0] = 0
+
+        n = len(self.labels)  # number of images
+        bi = np.floor(np.arange(n) / batch_size).astype(np.int_)  # batch index
+        nb = bi[-1] + 1  # number of batches
+        self.batch = bi  # batch index of image
+
+        # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
+        self.imgs, self.img_hw_ori, self.indices = None, None, range(n)
+
+        # Rectangular Train/Test
+        if self.rect:
+            # Sort by aspect ratio
+            s = self.img_shapes  # wh
+            ar = s[:, 1] / s[:, 0]  # aspect ratio
+            irect = ar.argsort()
+            self.img_files = [self.img_files[i] for i in irect]
+            self.label_files = [self.label_files[i] for i in irect]
+            self.labels = [self.labels[i] for i in irect]
+            self.img_shapes = s[irect]  # wh
+            ar = ar[irect]
+
+            # Set training image shapes
+            shapes = [[1, 1]] * nb
+            for i in range(nb):
+                ari = ar[bi == i]
+                mini, maxi = ari.min(), ari.max()
+                if maxi < 1:
+                    shapes[i] = [maxi, 1]
+                elif mini > 1:
+                    shapes[i] = [1, 1 / mini]
+
+            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int_) * stride
+
+        self.imgIds = [int(Path(im_file).stem) for im_file in self.img_files]
+
+    def cache_labels(self, path=Path("./labels.cache.npy")):
+        # Cache dataset labels, check images and read shapes
+        x = {'labels': []}  # dict
+        nm, nf, ne, nc, segments, keypoints = 0, 0, 0, 0, [], None  # number missing, found, empty, duplicate
+        pbar = tqdm(zip(self.img_files, self.label_files), desc="Scanning images", total=len(self.img_files))
+        if self.return_keypoints and (self.nkpt <= 0 or self.ndim not in (2, 3)):
+            raise ValueError("'kpt_shape' in data.yaml missing or incorrect. Should be a list with [number of "
+                             "keypoints, number of dims (2 for x,y or 3 for x,y,visible)], i.e. 'kpt_shape: [17, 3]'")
+        for i, (im_file, lb_file) in enumerate(pbar):
+            try:
+                # verify images
+                im = Image.open(im_file)
+                im.verify()  # PIL verify
+                shape = self._exif_size(im)  # image size
+                segments = []  # instance segments
+                assert (shape[0] > 9) & (shape[1] > 9), f"image size {shape} <10 pixels"
+                assert im.format.lower() in self.img_formats, f"invalid image format {im.format}"
+
+                # verify labels
+                if os.path.isfile(lb_file):
+                    nf += 1  # label found
+                    with open(lb_file, "r") as f:
+                        lb = [x.split() for x in f.read().strip().splitlines()]
+                        if any([len(x) > 6 for x in lb]) and (not self.return_keypoints):  # is segment
+                            classes = np.array([x[0] for x in lb], dtype=np.float32)
+                            segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb]  # (cls, xy1...)
+                            lb = np.concatenate(
+                                (classes.reshape(-1, 1), segments2boxes(segments)), 1
+                            )  # (cls, xywh)
+                        lb = np.array(lb, dtype=np.float32)
+                    nl = len(lb)
+                    if nl:
+                        if self.return_keypoints:
+                            assert lb.shape[1] == (5 + self.nkpt * self.ndim), \
+                                f'labels require {(5 + self.nkpt * self.ndim)} columns each'
+                            assert (lb[:, 5::self.ndim] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
+                            assert (lb[:, 6::self.ndim] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
+                        else:
+                            assert lb.shape[1] == 5, f'labels require 5 columns, {lb.shape[1]} columns detected'
+                            assert (lb[:, 1:] <= 1).all(), \
+                                f'non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}'
+                            assert (lb >= 0).all(), f'negative label values {lb[lb < 0]}'
+                        # All labels
+                        max_cls = int(lb[:, 0].max())  # max label count
+                        assert max_cls <= self.num_cls, \
+                            f'Label class {max_cls} exceeds dataset class count {self.num_cls}. ' \
+                            f'Possible class labels are 0-{self.num_cls - 1}'
+                        _, j = np.unique(lb, axis=0, return_index=True)
+                        if len(j) < nl:  # duplicate row check
+                            lb = lb[j]  # remove duplicates
+                            if segments:
+                                segments = [segments[x] for x in i]
+                            print(f'WARNING ⚠️ {im_file}: {nl - len(j)} duplicate labels removed')
+                    else:
+                        ne += 1  # label empty
+                        lb = np.zeros((0, (5 + self.nkpt * self.ndim)), dtype=np.float32) \
+                            if self.return_keypoints else np.zeros((0, 5), dtype=np.float32)
+                else:
+                    nm += 1  # label missing
+                    lb = np.zeros((0, (5 + self.nkpt * self.ndim)), dtype=np.float32) \
+                        if self.return_keypoints else np.zeros((0, 5), dtype=np.float32)
+                if self.return_keypoints:
+                    keypoints = lb[:, 5:].reshape(-1, self.nkpt, self.ndim)
+                    if self.ndim == 2:
+                        kpt_mask = np.ones(keypoints.shape[:2], dtype=np.float32)
+                        kpt_mask = np.where(keypoints[..., 0] < 0, 0.0, kpt_mask)
+                        kpt_mask = np.where(keypoints[..., 1] < 0, 0.0, kpt_mask)
+                        keypoints = np.concatenate([keypoints, kpt_mask[..., None]], axis=-1)  # (nl, nkpt, 3)
+                lb = lb[:, :5]
+                x['labels'].append(
+                    dict(
+                        im_file=im_file,
+                        cls=lb[:, 0:1],     # (n, 1)
+                        bboxes=lb[:, 1:],   # (n, 4)
+                        segments=segments,  # list of (mi, 2)
+                        keypoints=keypoints,
+                        bbox_format='xywhn',
+                        segment_format='polygon'
+                    )
+                )
+            except Exception as e:
+                nc += 1
+                print(f"WARNING: Ignoring corrupted image and/or label {im_file}: {e}")
+
+            pbar.desc = f"Scanning '{path.parent / path.stem}' images and labels... " \
+                        f"{nf} found, {nm} missing, {ne} empty, {nc} corrupted"
+        pbar.close()
+
+        if nf == 0:
+            print(f"WARNING: No labels found in {path}.")
+
+        x["hash"] = self._get_hash(self.label_files + self.img_files)
+        x["results"] = nf, nm, ne, nc, len(self.img_files)
+        x["version"] = self.cache_version  # cache version
+        np.save(path, x)  # save for next time
+        logger.info(f"New cache created: {path}")
+        return x
+
+    def __getitem__(self, index):
+        sample = self.get_sample(index)
+
+        for _i, ori_trans in enumerate(self.transforms_dict):
+            _trans = ori_trans.copy()
+            func_name, prob = _trans.pop("func_name"), _trans.pop("prob", 1.0)
+            if func_name == 'copy_paste':
+                sample = self.copy_paste(sample, prob)
+            elif random.random() < prob:
+                if func_name == "albumentations" and getattr(self, "albumentations", None) is None:
+                    self.albumentations = Albumentations(size=self.img_size, **_trans)
+                if func_name == "letterbox":
+                    new_shape = self.img_size if not self.rect else self.batch_shapes[self.batch[index]]
+                    sample = self.letterbox(sample, new_shape, **_trans)
+                else:
+                    sample = getattr(self, func_name)(sample, **_trans)
+
+        sample['img'] = np.ascontiguousarray(sample['img'])
+        return sample
+
+    def __len__(self):
+        return len(self.img_files)
+
+    def get_sample(self, index):
+        """Get and return label information from the dataset."""
+        sample = deepcopy(self.labels[index])
+        if self.imgs is None:
+            path = self.img_files[index]
+            img = cv2.imread(path)  # BGR
+            assert img is not None, "Image Not Found " + path
+            h_ori, w_ori = img.shape[:2]  # orig hw
+            r = self.img_size / max(h_ori, w_ori)  # resize image to img_size
+            if r != 1:  # always resize down, only resize up if training with augmentation
+                interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
+                img = cv2.resize(img, (int(w_ori * r), int(h_ori * r)), interpolation=interp)
+
+            sample['img'], sample['ori_shape'] = img, np.array([h_ori, w_ori])  # img, hw_original
+
+        else:
+            sample['img'], sample['ori_shape'] = self.imgs[index], self.img_hw_ori[index]  # img, hw_original
+
+        return sample
+
+    def mosaic(
+        self,
+        sample,
+        mosaic9_prob=0.0,
+        post_transform=None,
+    ):
+        segment_format = sample['segment_format']
+        bbox_format = sample['bbox_format']
+        assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'
+        assert bbox_format == 'xywhn', f'The bbox format should be xywhn, but got {bbox_format}'
+
+        mosaic9_prob = min(1.0, max(mosaic9_prob, 0.0))
+        if random.random() < (1 - mosaic9_prob):
+            sample = self._mosaic4(sample)
+        else:
+            sample = self._mosaic9(sample)
+
+        if post_transform:
+            for _i, ori_trans in enumerate(post_transform):
+                _trans = ori_trans.copy()
+                func_name, prob = _trans.pop("func_name"), _trans.pop("prob", 1.0)
+                sample = getattr(self, func_name)(sample, **_trans)
+
+        return sample
+
+    def _mosaic4(self, sample):
+        # loads images in a 4-mosaic
+        classes4, bboxes4, segments4 = [], [], []
+        mosaic_samples = [sample, ]
+        indices = random.choices(self.indices, k=3)  # 3 additional image indices
+
+        segments_is_list = isinstance(sample['segments'], list)
+        if segments_is_list:
+            mosaic_samples += [self.get_sample(i) for i in indices]
+        else:
+            mosaic_samples += [self.resample_segments(self.get_sample(i)) for i in indices]
+
+        s = self.img_size
+        mosaic_border = [-s // 2, -s // 2]
+        yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in mosaic_border]  # mosaic center x, y
+
+        for i, mosaic_sample in enumerate(mosaic_samples):
+            # Load image
+            img = mosaic_sample['img']
+            (h, w) = img.shape[:2]
+
+            # place img in img4
+            if i == 0:  # top left
+                img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
+            elif i == 1:  # top right
+                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
+                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
+            elif i == 2:  # bottom left
+                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
+            elif i == 3:  # bottom right
+                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
+
+            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+            padw = x1a - x1b
+            padh = y1a - y1b
+
+            # box and cls
+            cls, bboxes = mosaic_sample['cls'], mosaic_sample['bboxes']
+            assert mosaic_sample['bbox_format'] == 'xywhn'
+            bboxes = xywhn2xyxy(bboxes, w, h, padw, padh)  # normalized xywh to pixel xyxy format
+            classes4.append(cls)
+            bboxes4.append(bboxes)
+
+            # seg
+            assert mosaic_sample['segment_format'] == 'polygon'
+            segments = mosaic_sample['segments']
+            if segments_is_list:
+                segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
+                segments4.extend(segments)
+            else:
+                segments = xyn2xy(segments, w, h, padw, padh)
+                segments4.append(segments)
+
+        classes4 = np.concatenate(classes4, 0)
+        bboxes4 = np.concatenate(bboxes4, 0)
+        bboxes4 = bboxes4.clip(0, 2 * s)
+
+        if segments_is_list:
+            for x in segments4:
+                np.clip(x, 0, 2 * s, out=x)
+        else:
+            segments4 = np.concatenate(segments4, 0)
+            segments4 = segments4.clip(0, 2 * s)
+
+        sample['img'] = img4
+        sample['cls'] = classes4
+        sample['bboxes'] = bboxes4
+        sample['bbox_format'] = 'ltrb'
+        sample['segments'] = segments4
+        sample['mosaic_border'] = mosaic_border
+
+        return sample
+
+    def _mosaic9(self, sample):
+        # loads images in a 9-mosaic
+        classes9, bboxes9, segments9 = [], [], []
+        mosaic_samples = [sample, ]
+        indices = random.choices(self.indices, k=8)  # 8 additional image indices
+
+        segments_is_list = isinstance(sample['segments'], list)
+        if segments_is_list:
+            mosaic_samples += [self.get_sample(i) for i in indices]
+        else:
+            mosaic_samples += [self.resample_segments(self.get_sample(i)) for i in indices]
+        s = self.img_size
+        mosaic_border = [-s // 2, -s // 2]
+
+        for i, mosaic_sample in enumerate(mosaic_samples):
+            # Load image
+            img = mosaic_sample['img']
+            (h, w) = img.shape[:2]
+
+            # place img in img9
+            if i == 0:  # center
+                img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+                h0, w0 = h, w
+                c = s, s, s + w, s + h  # xmin, ymin, xmax, ymax (base) coordinates
+            elif i == 1:  # top
+                c = s, s - h, s + w, s
+            elif i == 2:  # top right
+                c = s + wp, s - h, s + wp + w, s
+            elif i == 3:  # right
+                c = s + w0, s, s + w0 + w, s + h
+            elif i == 4:  # bottom right
+                c = s + w0, s + hp, s + w0 + w, s + hp + h
+            elif i == 5:  # bottom
+                c = s + w0 - w, s + h0, s + w0, s + h0 + h
+            elif i == 6:  # bottom left
+                c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
+            elif i == 7:  # left
+                c = s - w, s + h0 - h, s, s + h0
+            elif i == 8:  # top left
+                c = s - w, s + h0 - hp - h, s, s + h0 - hp
+
+            padx, pady = c[:2]
+            x1, y1, x2, y2 = [max(x, 0) for x in c]  # allocate coords
+
+            # box and cls
+            assert mosaic_sample['bbox_format'] == 'xywhn'
+            cls, bboxes = mosaic_sample['cls'], mosaic_sample['bboxes']
+            bboxes = xywhn2xyxy(bboxes, w, h, padx, pady)  # normalized xywh to pixel xyxy format
+            classes9.append(cls)
+            bboxes9.append(bboxes)
+
+            # seg
+            assert mosaic_sample['segment_format'] == 'polygon'
+            segments = mosaic_sample['segments']
+            if segments_is_list:
+                segments = [xyn2xy(x, w, h, padx, pady) for x in segments]
+                segments9.extend(segments)
+            else:
+                segments = xyn2xy(segments, w, h, padx, pady)
+                segments9.append(segments)
+
+            # Image
+            img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:]  # img9[ymin:ymax, xmin:xmax]
+            hp, wp = h, w  # height, width previous
+
+        # Offset
+        yc, xc = [int(random.uniform(0, s)) for _ in mosaic_border]  # mosaic center x, y
+        img9 = img9[yc: yc + 2 * s, xc: xc + 2 * s]
+
+        # Concat/clip labels
+        classes9 = np.concatenate(classes9, 0)
+        bboxes9 = np.concatenate(bboxes9, 0)
+        bboxes9[:, [0, 2]] -= xc
+        bboxes9[:, [1, 3]] -= yc
+        bboxes9 = bboxes9.clip(0, 2 * s)
+
+        if segments_is_list:
+            c = np.array([xc, yc])  # centers
+            segments9 = [x - c for x in segments9]
+            for x in segments9:
+                np.clip(x, 0, 2 * s, out=x)
+        else:
+            segments9 = np.concatenate(segments9, 0)
+            segments9[..., 0] -= xc
+            segments9[..., 1] -= yc
+            segments9 = segments9.clip(0, 2 * s)
+
+        sample['img'] = img9
+        sample['cls'] = classes9
+        sample['bboxes'] = bboxes9
+        sample['bbox_format'] = 'ltrb'
+        sample['segments'] = segments9
+        sample['mosaic_border'] = mosaic_border
+
+        return sample
+
+    def resample_segments(self, sample, n=1000):
+        segment_format = sample['segment_format']
+        assert segment_format == 'polygon', f'The segment format is should be polygon, but got {segment_format}'
+
+        segments = sample['segments']
+        if len(segments) > 0:
+            # Up-sample an (n,2) segment
+            for i, s in enumerate(segments):
+                s = np.concatenate((s, s[0:1, :]), axis=0)
+                x = np.linspace(0, len(s) - 1, n)
+                xp = np.arange(len(s))
+                segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T  # segment xy
+            segments = np.stack(segments, axis=0)
+        else:
+            segments = np.zeros((0, 1000, 2), dtype=np.float32)
+        sample['segments'] = segments
+        return sample
+
+    def copy_paste(self, sample, probability=0.5):
+        # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
+        bbox_format, segment_format = sample['bbox_format'], sample['segment_format']
+        assert bbox_format == 'ltrb', f'The bbox format should be ltrb, but got {bbox_format}'
+        assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'
+
+        img = sample['img']
+        cls = sample['cls']
+        bboxes = sample['bboxes']
+        segments = sample['segments']
+
+        n = len(segments)
+        if probability and n:
+            h, w, _ = img.shape  # height, width, channels
+            im_new = np.zeros(img.shape, np.uint8)
+            for j in random.sample(range(n), k=round(probability * n)):
+                c, l, s = cls[j], bboxes[j], segments[j]
+                box = w - l[2], l[1], w - l[0], l[3]
+                ioa = bbox_ioa(box, bboxes)  # intersection over area
+                if (ioa < 0.30).all():  # allow 30% obscuration of existing labels
+                    cls = np.concatenate((cls, [c]), 0)
+                    bboxes = np.concatenate((bboxes, [box]), 0)
+                    if isinstance(segments, list):
+                        segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
+                    else:
+                        segments = np.concatenate((segments, [np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1)]), 0)
+                    cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
+
+            result = cv2.bitwise_and(src1=img, src2=im_new)
+            result = cv2.flip(result, 1)  # augment segments (flip left-right)
+            i = result > 0  # pixels to replace
+            img[i] = result[i]  # cv2.imwrite('debug.jpg', img)  # debug
+
+        sample['img'] = img
+        sample['cls'] = cls
+        sample['bboxes'] = bboxes
+        sample['segments'] = segments
+
+        return sample
+
+    def random_perspective(
+            self, sample, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, border=(0, 0)
+    ):
+        bbox_format, segment_format = sample['bbox_format'], sample['segment_format']
+        assert bbox_format == 'ltrb', f'The bbox format should be ltrb, but got {bbox_format}'
+        assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'
+
+        img = sample['img']
+        cls = sample['cls']
+        targets = sample['bboxes']
+        segments = sample['segments']
+        assert isinstance(segments, np.ndarray), f"segments type expect numpy.ndarray, but got {type(segments)}; " \
+                                                 f"maybe you should resample_segments before that."
+
+        border = sample.pop('mosaic_border', border)
+        height = img.shape[0] + border[0] * 2  # shape(h,w,c)
+        width = img.shape[1] + border[1] * 2
+
+        # Center
+        C = np.eye(3)
+        C[0, 2] = -img.shape[1] / 2  # x translation (pixels)
+        C[1, 2] = -img.shape[0] / 2  # y translation (pixels)
+
+        # Perspective
+        P = np.eye(3)
+        P[2, 0] = random.uniform(-perspective, perspective)  # x perspective (about y)
+        P[2, 1] = random.uniform(-perspective, perspective)  # y perspective (about x)
+
+        # Rotation and Scale
+        R = np.eye(3)
+        a = random.uniform(-degrees, degrees)
+        s = random.uniform(1 - scale, 1 + scale)
+        R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
+
+        # Shear
+        S = np.eye(3)
+        S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
+        S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)
+
+        # Translation
+        T = np.eye(3)
+        T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width  # x translation (pixels)
+        T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height  # y translation (pixels)
+
+        # Combined rotation matrix
+        M = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANT
+        if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed
+            if perspective:
+                img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))
+            else:  # affine
+                img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
+
+        # Transform label coordinates
+        n = len(targets)
+        if n:
+            use_segments = len(segments)
+            new_bboxes = np.zeros((n, 4))
+            if use_segments:  # warp segments
+                point_num = segments[0].shape[0]
+                new_segments = np.zeros((n, point_num, 2))
+                for i, segment in enumerate(segments):
+                    xy = np.ones((len(segment), 3))
+                    xy[:, :2] = segment
+                    xy = xy @ M.T  # transform
+                    xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]  # perspective rescale or affine
+
+                    # clip
+                    new_segments[i] = xy
+                    new_bboxes[i] = segment2box(xy, width, height)
+
+            else:  # warp boxes
+                xy = np.ones((n * 4, 3))
+                xy[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
+                xy = xy @ M.T  # transform
+                xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8)  # perspective rescale or affine
+
+                # create new boxes
+                x = xy[:, [0, 2, 4, 6]]
+                y = xy[:, [1, 3, 5, 7]]
+                new_bboxes = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+
+                # clip
+                new_bboxes[:, [0, 2]] = new_bboxes[:, [0, 2]].clip(0, width)
+                new_bboxes[:, [1, 3]] = new_bboxes[:, [1, 3]].clip(0, height)
+
+            # filter candidates
+            i = box_candidates(box1=targets.T * s, box2=new_bboxes.T, area_thr=0.01 if use_segments else 0.10)
+
+            cls = cls[i]
+            targets = new_bboxes[i]
+            sample['cls'] = cls
+            sample['bboxes'] = targets
+            if use_segments:
+                sample['segments'] = segments[i]
+
+        sample['img'] = img
+
+        return sample
+
+    def mixup(self, sample, alpha: 32.0, beta: 32.0, pre_transform=None):
+        bbox_format, segment_format = sample['bbox_format'], sample['segment_format']
+        assert bbox_format == 'ltrb', f'The bbox format should be ltrb, but got {bbox_format}'
+        assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'
+
+        index = random.choices(self.indices, k=1)[0]
+        sample2 = self.get_sample(index)
+        if pre_transform:
+            for _i, ori_trans in enumerate(pre_transform):
+                _trans = ori_trans.copy()
+                func_name, prob = _trans.pop("func_name"), _trans.pop("prob", 1.0)
+                if func_name == 'copy_paste':
+                    sample2 = self.copy_paste(sample2, prob)
+                elif random.random() < prob:
+                    if func_name == "albumentations" and getattr(self, "albumentations", None) is None:
+                        self.albumentations = Albumentations(size=self.img_size, **_trans)
+                    sample2 = getattr(self, func_name)(sample2, **_trans)
+
+        assert isinstance(sample['segments'], np.ndarray), \
+            f"MixUp: sample segments type expect numpy.ndarray, but got {type(sample['segments'])}; " \
+            f"maybe you should resample_segments before that."
+        assert isinstance(sample2['segments'], np.ndarray), \
+            f"MixUp: sample2 segments type expect numpy.ndarray, but got {type(sample2['segments'])}; " \
+            f"maybe you should add resample_segments in pre_transform."
+
+        image, image2 = sample['img'], sample2['img']
+        r = np.random.beta(alpha, beta)  # mixup ratio, alpha=beta=8.0
+        image = (image * r + image2 * (1 - r)).astype(np.uint8)
+
+        sample['img'] = image
+        sample['cls'] = np.concatenate((sample['cls'], sample2['cls']), 0)
+        sample['bboxes'] = np.concatenate((sample['bboxes'], sample2['bboxes']), 0)
+        sample['segments'] = np.concatenate((sample['segments'], sample2['segments']), 0)
+        return sample
+
+    def pastein(self, sample, num_sample=30):
+        bbox_format = sample['bbox_format']
+        assert bbox_format == 'ltrb', f'The bbox format should be ltrb, but got {bbox_format}'
+        assert not self.return_segments, "pastein currently does not support seg data."
+        assert not self.return_keypoints, "pastein currently does not support keypoint data."
+        sample.pop('segments', None)
+        sample.pop('keypoints', None)
+
+        image = sample['img']
+        cls = sample['cls']
+        bboxes = sample['bboxes']
+        # load sample
+        sample_labels, sample_images, sample_masks = [], [], []
+        while len(sample_labels) < num_sample:
+            sample_labels_, sample_images_, sample_masks_ = self._pastin_load_samples()
+            sample_labels += sample_labels_
+            sample_images += sample_images_
+            sample_masks += sample_masks_
+            if len(sample_labels) == 0:
+                break
+
+        # Applies image cutout augmentation https://arxiv.org/abs/1708.04552
+        h, w = image.shape[:2]
+
+        # create random masks
+        scales = [0.75] * 2 + [0.5] * 4 + [0.25] * 4 + [0.125] * 4 + [0.0625] * 6  # image size fraction
+        for s in scales:
+            if random.random() < 0.2:
+                continue
+            mask_h = random.randint(1, int(h * s))
+            mask_w = random.randint(1, int(w * s))
+
+            # box
+            xmin = max(0, random.randint(0, w) - mask_w // 2)
+            ymin = max(0, random.randint(0, h) - mask_h // 2)
+            xmax = min(w, xmin + mask_w)
+            ymax = min(h, ymin + mask_h)
+
+            box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
+            if len(bboxes):
+                ioa = bbox_ioa(box, bboxes)  # intersection over area
+            else:
+                ioa = np.zeros(1)
+
+            if (
+                    (ioa < 0.30).all() and len(sample_labels) and (xmax > xmin + 20) and (ymax > ymin + 20)
+            ):  # allow 30% obscuration of existing labels
+                sel_ind = random.randint(0, len(sample_labels) - 1)
+                hs, ws, cs = sample_images[sel_ind].shape
+                r_scale = min((ymax - ymin) / hs, (xmax - xmin) / ws)
+                r_w = int(ws * r_scale)
+                r_h = int(hs * r_scale)
+
+                if (r_w > 10) and (r_h > 10):
+                    r_mask = cv2.resize(sample_masks[sel_ind], (r_w, r_h))
+                    r_image = cv2.resize(sample_images[sel_ind], (r_w, r_h))
+                    temp_crop = image[ymin: ymin + r_h, xmin: xmin + r_w]
+                    m_ind = r_mask > 0
+                    if m_ind.astype(np.int_).sum() > 60:
+                        temp_crop[m_ind] = r_image[m_ind]
+                        box = np.array([xmin, ymin, xmin + r_w, ymin + r_h], dtype=np.float32)
+                        if len(bboxes):
+                            cls = np.concatenate((cls, [[sample_labels[sel_ind]]]), 0)
+                            bboxes = np.concatenate((bboxes, [box]), 0)
+                        else:
+                            cls = np.array([[sample_labels[sel_ind]]])
+                            bboxes = np.array([box])
+
+                        image[ymin: ymin + r_h, xmin: xmin + r_w] = temp_crop  # Modify on the original image
+
+        sample['img'] = image
+        sample['bboxes'] = bboxes
+        sample['cls'] = cls
+        return sample
+
+    def _pastin_load_samples(self):
+        # loads images in a 4-mosaic
+        classes4, bboxes4, segments4 = [], [], []
+        mosaic_samples = []
+        indices = random.choices(self.indices, k=4)  # 3 additional image indices
+        mosaic_samples += [self.get_sample(i) for i in indices]
+        s = self.img_size
+        mosaic_border = [-s // 2, -s // 2]
+        yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in mosaic_border]  # mosaic center x, y
+
+        for i, sample in enumerate(mosaic_samples):
+            # Load image
+            img = sample['img']
+            (h, w) = img.shape[:2]
+
+            # place img in img4
+            if i == 0:  # top left
+                img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
+            elif i == 1:  # top right
+                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
+                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
+            elif i == 2:  # bottom left
+                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
+            elif i == 3:  # bottom right
+                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
+
+            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+            padw = x1a - x1b
+            padh = y1a - y1b
+
+            # Labels
+            cls, bboxes = sample['cls'], sample['bboxes']
+            bboxes = xywhn2xyxy(bboxes, w, h, padw, padh)  # normalized xywh to pixel xyxy format
+
+            classes4.append(cls)
+            bboxes4.append(bboxes)
+
+            segments = sample['segments']
+            segments_is_list = isinstance(segments, list)
+            if segments_is_list:
+                segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
+                segments4.extend(segments)
+            else:
+                segments = xyn2xy(segments, w, h, padw, padh)
+                segments4.append(segments)
+
+        # Concat/clip labels
+        classes4 = np.concatenate(classes4, 0)
+        bboxes4 = np.concatenate(bboxes4, 0)
+        bboxes4 = bboxes4.clip(0, 2 * s)
+
+        if segments_is_list:
+            for x in segments4:
+                np.clip(x, 0, 2 * s, out=x)
+        else:
+            segments4 = np.concatenate(segments4, 0)
+            segments4 = segments4.clip(0, 2 * s)
+
+        # Augment
+        sample_labels, sample_images, sample_masks = \
+            self._pastin_sample_segments(img4, classes4, bboxes4, segments4, probability=0.5)
+
+        return sample_labels, sample_images, sample_masks
+
+    def _pastin_sample_segments(self, img, classes, bboxes, segments, probability=0.5):
+        # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
+        n = len(segments)
+        sample_labels = []
+        sample_images = []
+        sample_masks = []
+        if probability and n:
+            h, w, c = img.shape  # height, width, channels
+            for j in random.sample(range(n), k=round(probability * n)):
+                cls, l, s = classes[j], bboxes[j], segments[j]
+                box = (
+                    l[0].astype(int).clip(0, w - 1),
+                    l[1].astype(int).clip(0, h - 1),
+                    l[2].astype(int).clip(0, w - 1),
+                    l[3].astype(int).clip(0, h - 1),
+                )
+
+                if (box[2] <= box[0]) or (box[3] <= box[1]):
+                    continue
+
+                sample_labels.append(cls[0])
+
+                mask = np.zeros(img.shape, np.uint8)
+
+                cv2.drawContours(mask, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
+                sample_masks.append(mask[box[1]: box[3], box[0]: box[2], :])
+
+                result = cv2.bitwise_and(src1=img, src2=mask)
+                i = result > 0  # pixels to replace
+                mask[i] = result[i]  # cv2.imwrite('debug.jpg', img)  # debug
+                sample_images.append(mask[box[1]: box[3], box[0]: box[2], :])
+
+        return sample_labels, sample_images, sample_masks
+
+    def hsv_augment(self, sample, hgain=0.5, sgain=0.5, vgain=0.5):
+        image = sample['img']
+        r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1  # random gains
+        hue, sat, val = cv2.split(cv2.cvtColor(image, cv2.COLOR_BGR2HSV))
+        dtype = image.dtype  # uint8
+
+        x = np.arange(0, 256, dtype=np.int16)
+        lut_hue = ((x * r[0]) % 180).astype(dtype)
+        lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
+        lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
+
+        img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
+        cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=image)  # Modify on the original image
+
+        sample['img'] = image
+        return sample
+
+    def fliplr(self, sample):
+        # flip image left-right
+        image = sample['img']
+        image = np.fliplr(image)
+        sample['img'] = image
+
+        # flip box
+        _, w = image.shape[:2]
+        bboxes, bbox_format = sample['bboxes'], sample['bbox_format']
+        if bbox_format == "ltrb":
+            if len(bboxes):
+                x1 = bboxes[:, 0].copy()
+                x2 = bboxes[:, 2].copy()
+                bboxes[:, 0] = w - x2
+                bboxes[:, 2] = w - x1
+        elif bbox_format == "xywhn":
+            if len(bboxes):
+                bboxes[:, 0] = 1 - bboxes[:, 0]
+        else:
+            raise NotImplementedError
+        sample['bboxes'] = bboxes
+
+        # flip seg
+        if self.return_segments:
+            segment_format, segments = sample['segment_format'], sample['segments']
+            assert segment_format == 'polygon', \
+                f'FlipLR: The segment format should be polygon, but got {segment_format}'
+            assert isinstance(segments, np.ndarray), \
+                f"FlipLR: segments type expect numpy.ndarray, but got {type(segments)}; " \
+                f"maybe you should resample_segments before that."
+
+            if len(segments):
+                segments[..., 0] = w - segments[..., 0]
+
+            sample['segments'] = segments
+
+        return sample
+
+    def letterbox(self, sample, new_shape=None, xywhn2xyxy_=True, scaleup=False, only_image=False, color=(114, 114, 114)):
+        # Resize and pad image while meeting stride-multiple constraints
+        if sample['bbox_format'] == 'ltrb':
+            xywhn2xyxy_ = False
+
+        if not new_shape:
+            new_shape = self.img_size
+
+        if isinstance(new_shape, int):
+            new_shape = (new_shape, new_shape)
+
+        image = sample['img']
+        shape = image.shape[:2]  # current shape [height, width]
+
+        h, w = shape[:]
+        ori_shape = sample['ori_shape']
+        h0, w0 = ori_shape
+        hw_scale = np.array([h / h0, w / w0])
+        sample['hw_scale'] = hw_scale
+
+        # Scale ratio (new / old)
+        r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+        if not scaleup:  # only scale down, do not scale up (for better test mAP)
+            r = min(r, 1.0)
+
+        # Compute padding
+        new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+        dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+
+        dw /= 2  # divide padding into 2 sides
+        dh /= 2
+        hw_pad = np.array([dh, dw])
+
+        if shape != new_shape:
+            if shape[::-1] != new_unpad:  # resize
+                image = cv2.resize(image, new_unpad, interpolation=cv2.INTER_LINEAR)
+            top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+            left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+            image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+            sample['hw_pad'] = hw_pad
+        else:
+            sample['hw_pad'] = np.array([0., 0.])
+        bboxes = sample['bboxes']
+        if not only_image:
+            # convert bboxes
+            if len(bboxes):
+                if xywhn2xyxy_:
+                    bboxes = xywhn2xyxy(bboxes, r * w, r * h, padw=dw, padh=dh)
+                else:
+                    bboxes *= r
+                    bboxes[:, [0, 2]] += dw
+                    bboxes[:, [1, 3]] += dh
+                sample['bboxes'] = bboxes
+            sample['bbox_format'] = 'ltrb'
+
+            # convert segments
+            if 'segments' in sample:
+                segments, segment_format = sample['segments'], sample['segment_format']
+                assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'
+
+                if len(segments):
+                    if isinstance(segments, np.ndarray):
+                        if xywhn2xyxy_:
+                            segments[..., 0] *= w
+                            segments[..., 1] *= h
+                        else:
+                            segments *= r
+                        segments[..., 0] += dw
+                        segments[..., 1] += dh
+                    elif isinstance(segments, list):
+                        for segment in segments:
+                            if xywhn2xyxy_:
+                                segment[..., 0] *= w
+                                segment[..., 1] *= h
+                            else:
+                                segment *= r
+                            segment[..., 0] += dw
+                            segment[..., 1] += dh
+                    sample['segments'] = segments
+
+        sample['img'] = image
+        return sample
+
+    def label_norm(self, sample, xyxy2xywh_=True):
+        bbox_format = sample['bbox_format']
+        if bbox_format == "xywhn":
+            return sample
+
+        bboxes = sample['bboxes']
+        if len(bboxes) == 0:
+            sample['bbox_format'] = 'xywhn'
+            return sample
+
+        if xyxy2xywh_:
+            bboxes = xyxy2xywh(bboxes)  # convert xyxy to xywh
+        height, width = sample['img'].shape[:2]
+        bboxes[:, [1, 3]] /= height  # normalized height 0-1
+        bboxes[:, [0, 2]] /= width  # normalized width 0-1
+        sample['bboxes'] = bboxes
+        sample['bbox_format'] = 'xywhn'
+
+        return sample
+
+    def label_pad(self, sample, padding_size=160, padding_value=-1):
+        # create fixed label, avoid dynamic shape problem.
+        bbox_format = sample['bbox_format']
+        assert bbox_format == 'xywhn', f'The bbox format should be xywhn, but got {bbox_format}'
+
+        cls, bboxes = sample['cls'], sample['bboxes']
+        cls_pad = np.full((padding_size, 1), padding_value, dtype=np.float32)
+        bboxes_pad = np.full((padding_size, 4), padding_value, dtype=np.float32)
+        nL = len(bboxes)
+        if nL:
+            cls_pad[:min(nL, padding_size)] = cls[:min(nL, padding_size)]
+            bboxes_pad[:min(nL, padding_size)] = bboxes[:min(nL, padding_size)]
+        sample['cls'] = cls_pad
+        sample['bboxes'] = bboxes_pad
+
+        if "segments" in sample:
+            if sample['segment_format'] == "mask":
+                segments = sample['segments']
+                assert isinstance(segments, np.ndarray), \
+                    f"Label Pad: segments type expect numpy.ndarray, but got {type(segments)}; " \
+                    f"maybe you should resample_segments before that."
+                assert nL == segments.shape[0], f"Label Pad: segments len not equal bboxes"
+                h, w = segments.shape[1:]
+                segments_pad = np.full((padding_size, h, w), padding_value, dtype=np.float32)
+                segments_pad[:min(nL, padding_size)] = segments[:min(nL, padding_size)]
+                sample['segments'] = segments_pad
+
+        return sample
+
+    def image_norm(self, sample, scale=255.0):
+        image = sample['img']
+        image = image.astype(np.float32, copy=False)
+        image /= scale
+        sample['img'] = image
+        return sample
+
+    def image_transpose(self, sample, bgr2rgb=True, hwc2chw=True):
+        image = sample['img']
+        if bgr2rgb:
+            image = image[:, :, ::-1]
+        if hwc2chw:
+            image = image.transpose(2, 0, 1)
+        sample['img'] = image
+        return sample
+
+    def segment_poly2mask(self, sample, mask_overlap, mask_ratio):
+        """convert polygon points to bitmap."""
+        segments, segment_format = sample['segments'], sample['segment_format']
+        assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'
+        assert isinstance(segments, np.ndarray), \
+            f"Segment Poly2Mask: segments type expect numpy.ndarray, but got {type(segments)}; " \
+            f"maybe you should resample_segments before that."
+
+        h, w = sample['img'].shape[:2]
+        if mask_overlap:
+            masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=mask_ratio)
+            sample['cls'] = sample['cls'][sorted_idx]
+            sample['bboxes'] = sample['bboxes'][sorted_idx]
+            sample['segments'] = masks  # (h/mask_ratio, w/mask_ratio)
+            sample['segment_format'] = 'overlap'
+        else:
+            masks = polygons2masks((h, w), segments, color=1, downsample_ratio=mask_ratio)
+            sample['segments'] = masks
+            sample['segment_format'] = 'mask'
+
+        return sample
+
+    def _img2label_paths(self, img_paths):
+        # Define label paths as a function of image paths
+        sa, sb = os.sep + "images" + os.sep, os.sep + "labels" + os.sep  # /images/, /labels/ substrings
+        return ["txt".join(x.replace(sa, sb, 1).rsplit(x.split(".")[-1], 1)) for x in img_paths]
+
+    def _get_hash(self, paths):
+        # Returns a single hash value of a list of paths (files or dirs)
+        size = sum(os.path.getsize(p) for p in paths if os.path.exists(p))  # sizes
+        h = hashlib.md5(str(size).encode())  # hash sizes
+        h.update("".join(paths).encode())  # hash paths
+        return h.hexdigest()  # return hash
+
+    def _exif_size(self, img):
+        # Returns exif-corrected PIL size
+        s = img.size  # (width, height)
+        try:
+            rotation = dict(img._getexif().items())[orientation]
+            if rotation == 6:  # rotation 270
+                s = (s[1], s[0])
+            elif rotation == 8:  # rotation 90
+                s = (s[1], s[0])
+        except:
+            pass
+
+        return s
+
+    def train_collate_fn(self, batch_samples, batch_info):
+        imgs = [sample.pop('img') for sample in batch_samples]
+        labels = []
+        for i, sample in enumerate(batch_samples):
+            cls, bboxes = sample.pop('cls'), sample.pop('bboxes')
+            labels.append(np.concatenate((np.full_like(cls, i), cls, bboxes), axis=-1))
+        return_items = [np.stack(imgs, 0), np.stack(labels, 0)]
+
+        if self.return_segments:
+            masks = [sample.pop('segments', None) for sample in batch_samples]
+            return_items.append(np.stack(masks, 0))
+        if self.return_keypoints:
+            keypoints = [sample.pop('keypoints', None) for sample in batch_samples]
+            return_items.append(np.stack(keypoints, 0))
+
+        return tuple(return_items)
+
+    def test_collate_fn(self, batch_samples, batch_info):
+        imgs = [sample.pop('img') for sample in batch_samples]
+        path = [sample.pop('im_file') for sample in batch_samples]
+        hw_ori = [sample.pop('ori_shape') for sample in batch_samples]
+        hw_scale = [sample.pop('hw_scale') for sample in batch_samples]
+        pad = [sample.pop('hw_pad') for sample in batch_samples]
+        return (
+            np.stack(imgs, 0),
+            path,
+            np.stack(hw_ori, 0),
+            np.stack(hw_scale, 0),
+            np.stack(pad, 0),
+        )
+
+
+ + + +
+ + + + + + + + + +
+ + +

+ mindyolo.data.dataset.COCODataset.get_sample(index) + +

+ + +
+ +

Get and return label information from the dataset.

+ +
+ Source code in mindyolo/data/dataset.py +
321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
def get_sample(self, index):
+    """Get and return label information from the dataset."""
+    sample = deepcopy(self.labels[index])
+    if self.imgs is None:
+        path = self.img_files[index]
+        img = cv2.imread(path)  # BGR
+        assert img is not None, "Image Not Found " + path
+        h_ori, w_ori = img.shape[:2]  # orig hw
+        r = self.img_size / max(h_ori, w_ori)  # resize image to img_size
+        if r != 1:  # always resize down, only resize up if training with augmentation
+            interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
+            img = cv2.resize(img, (int(w_ori * r), int(h_ori * r)), interpolation=interp)
+
+        sample['img'], sample['ori_shape'] = img, np.array([h_ori, w_ori])  # img, hw_original
+
+    else:
+        sample['img'], sample['ori_shape'] = self.imgs[index], self.img_hw_ori[index]  # img, hw_original
+
+    return sample
+
+
+
+ +
+ +
+ + +

+ mindyolo.data.dataset.COCODataset.segment_poly2mask(sample, mask_overlap, mask_ratio) + +

+ + +
+ +

convert polygon points to bitmap.

+ +
+ Source code in mindyolo/data/dataset.py +
1111
+1112
+1113
+1114
+1115
+1116
+1117
+1118
+1119
+1120
+1121
+1122
+1123
+1124
+1125
+1126
+1127
+1128
+1129
+1130
+1131
def segment_poly2mask(self, sample, mask_overlap, mask_ratio):
+    """convert polygon points to bitmap."""
+    segments, segment_format = sample['segments'], sample['segment_format']
+    assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'
+    assert isinstance(segments, np.ndarray), \
+        f"Segment Poly2Mask: segments type expect numpy.ndarray, but got {type(segments)}; " \
+        f"maybe you should resample_segments before that."
+
+    h, w = sample['img'].shape[:2]
+    if mask_overlap:
+        masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=mask_ratio)
+        sample['cls'] = sample['cls'][sorted_idx]
+        sample['bboxes'] = sample['bboxes'][sorted_idx]
+        sample['segments'] = masks  # (h/mask_ratio, w/mask_ratio)
+        sample['segment_format'] = 'overlap'
+    else:
+        masks = polygons2masks((h, w), segments, color=1, downsample_ratio=mask_ratio)
+        sample['segments'] = masks
+        sample['segment_format'] = 'mask'
+
+    return sample
+
+
+
+ +
+ + + +
+ +
+ +

Albumentations

+ + +
+ + + +

+ mindyolo.data.albumentations.Albumentations + + +

+ + +
+ + +
+ Source code in mindyolo/data/albumentations.py +
10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
class Albumentations:
+    # Implement Albumentations augmentation https://github.com/ultralytics/yolov5
+    # YOLOv5 Albumentations class (optional, only used if package is installed)
+    def __init__(self, size=640, random_resized_crop=True, **kwargs):
+        self.transform = None
+        prefix = _colorstr("albumentations: ")
+        try:
+            import albumentations as A
+
+            _check_version(A.__version__, "1.0.3", hard=True)  # version requirement
+            T = []
+            if random_resized_crop:
+                T.extend([
+                    A.RandomResizedCrop(height=size, width=size, scale=(0.8, 1.0), ratio=(0.9, 1.11), p=0.0),
+                ])
+            T.extend([
+                A.Blur(p=0.01),
+                A.MedianBlur(p=0.01),
+                A.ToGray(p=0.01),
+                A.CLAHE(p=0.01),
+                A.RandomBrightnessContrast(p=0.0),
+                A.RandomGamma(p=0.0),
+                A.ImageCompression(quality_lower=75, p=0.0),
+            ])
+            self.transform = A.Compose(T, bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]))
+
+            print(prefix + ", ".join(f"{x}".replace("always_apply=False, ", "") for x in T if x.p), flush=True)
+            print("[INFO] albumentations load success", flush=True)
+        except ImportError:  # package not installed, skip
+            pass
+            print("[WARNING] package not installed, albumentations load failed", flush=True)
+        except Exception as e:
+            print(f"{prefix}{e}", flush=True)
+            print("[WARNING] albumentations load failed", flush=True)
+
+    def __call__(self, sample, p=1.0, **kwargs):
+        if self.transform and random.random() < p:
+            im, bboxes, cls, bbox_format = sample['img'], sample['bboxes'], sample['cls'], sample['bbox_format']
+            assert bbox_format in ("ltrb", "xywhn")
+            if bbox_format == "ltrb" and bboxes.shape[0] > 0:
+                h, w = im.shape[:2]
+                bboxes = xyxy2xywh(bboxes)
+                bboxes[:, [0, 2]] /= w
+                bboxes[:, [1, 3]] /= h
+
+            new = self.transform(image=im, bboxes=bboxes, class_labels=cls)  # transformed
+
+            sample['img'] = new['image']
+            sample['bboxes'] = np.array(new['bboxes'])
+            sample['cls'] = np.array(new['class_labels'])
+            sample['bbox_format'] = "xywhn"
+
+        return sample
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/reference/models/index.html b/reference/models/index.html new file mode 100644 index 00000000..2365ebd2 --- /dev/null +++ b/reference/models/index.html @@ -0,0 +1,1709 @@ + + + + + + + + + + + + + + + + + + + + + + + + Models - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Models

+

Create Model

+ + +
+ + +

+ mindyolo.models.model_factory.create_model(model_name, model_cfg=None, in_channels=3, num_classes=80, checkpoint_path='', **kwargs) + +

+ + +
+ +
+ Source code in mindyolo/models/model_factory.py +
16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
def create_model(
+    model_name: str,
+    model_cfg: dict = None,
+    in_channels: int = 3,
+    num_classes: int = 80,
+    checkpoint_path: str = "",
+    **kwargs,
+):
+    model_args = dict(cfg=model_cfg, num_classes=num_classes, in_channels=in_channels)
+    kwargs = {k: v for k, v in kwargs.items() if v is not None}
+
+    if not is_model(model_name):
+        raise RuntimeError(f"Unknown model {model_name}")
+
+    create_fn = model_entrypoint(model_name)
+    model = create_fn(**model_args, **kwargs)
+
+    if checkpoint_path:
+        assert os.path.isfile(checkpoint_path) and checkpoint_path.endswith(
+            ".ckpt"
+        ), f"[{checkpoint_path}] not a ckpt file."
+        checkpoint_param = load_checkpoint(checkpoint_path)
+        load_param_into_net(model, checkpoint_param)
+        logger.info(f"Load checkpoint from [{checkpoint_path}] success.")
+
+    return model
+
+
+
+ +

YOLOV3

+ + +
+ + +

+ mindyolo.models.yolov3(cfg, in_channels=3, num_classes=None, **kwargs) + +

+ + +
+ +

Get yolov3 model.

+ +
+ Source code in mindyolo/models/yolov3.py +
44
+45
+46
+47
+48
@register_model
+def yolov3(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv3:
+    """Get yolov3 model."""
+    model = YOLOv3(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+
+ +

YOLOV4

+ + +
+ + +

+ mindyolo.models.yolov4(cfg, in_channels=3, num_classes=None, **kwargs) + +

+ + +
+ +

Get yolov4 model.

+ +
+ Source code in mindyolo/models/yolov4.py +
33
+34
+35
+36
+37
@register_model
+def yolov4(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv4:
+    """Get yolov4 model."""
+    model = YOLOv4(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+
+ +

YOLOV5

+ + +
+ + +

+ mindyolo.models.yolov5(cfg, in_channels=3, num_classes=None, **kwargs) + +

+ + +
+ +

Get yolov5 model.

+ +
+ Source code in mindyolo/models/yolov5.py +
44
+45
+46
+47
+48
@register_model
+def yolov5(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv5:
+    """Get yolov5 model."""
+    model = YOLOv5(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+
+ +

YOLOV7

+ + +
+ + +

+ mindyolo.models.yolov7(cfg, in_channels=3, num_classes=None, **kwargs) + +

+ + +
+ +

Get yolov7 model.

+ +
+ Source code in mindyolo/models/yolov7.py +
46
+47
+48
+49
+50
@register_model
+def yolov7(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv7:
+    """Get yolov7 model."""
+    model = YOLOv7(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+
+ +

YOLOV8

+ + +
+ + +

+ mindyolo.models.yolov8(cfg, in_channels=3, num_classes=None, **kwargs) + +

+ + +
+ +

Get yolov8 model.

+ +
+ Source code in mindyolo/models/yolov8.py +
45
+46
+47
+48
+49
@register_model
+def yolov8(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv8:
+    """Get yolov8 model."""
+    model = YOLOv8(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+
+ +

YOLOX

+ + +
+ + +

+ mindyolo.models.yolox(cfg, in_channels=3, num_classes=None, **kwargs) + +

+ + +
+ +

Get yolox model.

+ +
+ Source code in mindyolo/models/yolox.py +
43
+44
+45
+46
+47
@register_model
+def yolox(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOX:
+    """Get yolox model."""
+    model = YOLOX(cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/search/search_index.json b/search/search_index.json new file mode 100644 index 00000000..f8263839 --- /dev/null +++ b/search/search_index.json @@ -0,0 +1 @@ +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"MindYOLO","text":"

MindYOLO implements state-of-the-art YOLO series algorithms based on MindSpore. The following is the corresponding mindyolo versions and supported mindspore versions.

mindyolo mindspore master master 0.4 2.3.0 0.3 2.2.10 0.2 2.0 0.1 1.8

"},{"location":"#benchmark-and-model-zoo","title":"Benchmark and Model Zoo","text":"

See Benchmark Results.

"},{"location":"#supported-model-list","title":"supported model list","text":"
  • YOLOv10 (welcome to contribute)
  • YOLOv9 (welcome to contribute)
  • YOLOv8
  • YOLOv7
  • YOLOX
  • YOLOv5
  • YOLOv4
  • YOLOv3
"},{"location":"#installation","title":"Installation","text":"

See INSTALLATION for details.

"},{"location":"#getting-started","title":"Getting Started","text":"

See QUICK START for details.

"},{"location":"#notes","title":"Notes","text":"

\u26a0\ufe0f The current version is based on the static shape of GRAPH. The dynamic shape will be supported later. Please look forward to it.

"},{"location":"#how-to-contribute","title":"How to Contribute","text":"

We appreciate all contributions including issues and PRs to make MindYOLO better.

Please refer to CONTRIBUTING for the contributing guideline.

"},{"location":"#license","title":"License","text":"

MindYOLO is released under the Apache License 2.0.

"},{"location":"#acknowledgement","title":"Acknowledgement","text":"

MindYOLO is an open source project that welcome any contribution and feedback. We wish that the toolbox and benchmark could support the growing research community, reimplement existing methods, and develop their own new real-time object detection methods by providing a flexible and standardized toolkit.

"},{"location":"#citation","title":"Citation","text":"

If you find this project useful in your research, please consider cite:

@misc{MindSpore Object Detection YOLO 2023,\n    title={{MindSpore Object Detection YOLO}:MindSpore Object Detection YOLO Toolbox and Benchmark},\n    author={MindSpore YOLO Contributors},\n    howpublished = {\\url{https://github.com/mindspore-lab/mindyolo}},\n    year={2023}\n}\n
"},{"location":"installation/","title":"Installation","text":""},{"location":"installation/#dependency","title":"Dependency","text":"
  • mindspore >= 2.3
  • numpy >= 1.17.0
  • pyyaml >= 5.3
  • openmpi 4.0.3 (for distributed mode)

To install the dependency, please run

pip install -r requirements.txt\n

MindSpore can be easily installed by following the official instructions where you can select your hardware platform for the best fit. To run in distributed mode, openmpi is required to install.

\u26a0\ufe0f The current version only supports the Ascend platform, and the GPU platform will be supported later.

"},{"location":"installation/#install-with-pypi","title":"Install with PyPI","text":"

MindYOLO is published as a Python package and can be installed with pip, ideally by using a virtual environment. Open up a terminal and install MindYOLO with:

pip install mindyolo\n
"},{"location":"installation/#install-from-source-bleeding-edge-version","title":"Install from Source (Bleeding Edge Version)","text":""},{"location":"installation/#from-vcs","title":"from VCS","text":"
pip install git+https://github.com/mindspore-lab/mindyolo.git\n
"},{"location":"installation/#from-local-src","title":"from local src","text":"

As this project is in active development, if you are a developer or contributor, please prefer this installation!

MindYOLO can be directly used from GitHub by cloning the repository into a local folder which might be useful if you want to use the very latest version:

git clone https://github.com/mindspore-lab/mindyolo.git\n

After cloning from git, it is recommended that you install using \"editable\" mode, which can help resolve potential module import issues:

cd mindyolo\npip install -e .\n

In addition, we provide an optional fast coco api to improve eval speed. The code is provided in C++, and you can try compiling with the following commands (This operation is optional) :

cd mindyolo/csrc\nsh build.sh\n

We also provide fused GPU operators which are built upon MindSpore ops.Custom API. The fused GPU operators are able to improve train speed. The source code is provided in C++ and CUDA and is in the folder examples/custom_gpu_op/. To enable this feature in the GPU training process, you shall modify the method bbox_iou in mindyolo/models/losses/iou_loss.py by referring to the demo script examples/custom_gpu_op/iou_loss_fused.py. Before runing iou_loss_fused.py, you shall compile the C++ and CUDA source code to dynamic link libraries with the following commands (This operation is optional) :

bash examples/custom_gpu_op/fused_op/build.sh\n
"},{"location":"how_to_guides/callback/","title":"Usage of MindYOLO callback function","text":"

Callback function: When the program runs to a certain mount point, all methods registered to the mount point at runtime will be automatically called. The flexibility and extensibility of the program can be increased by using the callback function, because users can register custom methods to the mount point to be called without modifying the code in the program.

In MindYOLO, the callback function is specifically implemented in the mindyolo/utils/callback.py file.

#mindyolo/utils/callback.py\n@CALLBACK_REGISTRY.registry_module()\nclass callback_class_name(BaseCallback):\n    def __init__(self, **kwargs):\n        super().__init__()\n...\ndef callback_fn_name(self, run_context: RunContext):\n    pass\n

Add a dictionary list under the callback field of the model's yaml file to implement the call

#Callback function configuration dictionary:\ncallback:\n- { name: callback_class_name, args: xx }\n- { name: callback_class_name2, args: xx }\n
For example, take YOLOX as an example:

Add logic to the on_train_step_begin method in the YoloxSwitchTrain class in the mindyolo/utils/callback.py file to print \"train step begin\u201d log

@CALLBACK_REGISTRY.registry_module()\nclass YoloxSwitchTrain(BaseCallback):\n\n    def on_train_step_begin(self, run_context: RunContext):\n        # Custom logic\n        logger.info(\"train step begin\")\n        pass\n
Add the callback function under the callback field of the YOLOX corresponding yaml file configs/yolox/hyp.scratch.yaml
callback:\n- { name: YoloxSwitchTrain, switch_epoch_num: 285 }\n
Then the logger.info(\"train step begin\") statement will be executed before each training step is executed.

With the help of the callback function, users can customize the logic that needs to be executed at a certain mount point without having to understand the code of the complete training process.

"},{"location":"how_to_guides/data_preparation/","title":"Data preparation","text":""},{"location":"how_to_guides/data_preparation/#dataset-format-introduction","title":"Dataset format introduction","text":"

Download coco2017 YOLO format coco2017labels-segments and coco2017 original images train2017 , val2017 , then put the coco2017 original images into the coco2017 YOLO format images directory:

\u2514\u2500 coco2017_yolo\n    \u251c\u2500 annotations\n        \u2514\u2500 instances_val2017.json\n    \u251c\u2500 images\n        \u251c\u2500 train2017   # coco2017 \u539f\u59cb\u56fe\u7247\n        \u2514\u2500 val2017     # coco2017 \u539f\u59cb\u56fe\u7247\n    \u251c\u2500 labels\n        \u251c\u2500 train2017\n        \u2514\u2500 val2017\n    \u251c\u2500 train2017.txt\n    \u251c\u2500 val2017.txt\n    \u2514\u2500 test-dev2017.txt\n
Each line of the train.txt file corresponds to the relative path of a single image, for example:
./images/train2017/00000000.jpg\n./images/train2017/00000001.jpg\n./images/train2017/00000002.jpg\n./images/train2017/00000003.jpg\n./images/train2017/00000004.jpg\n./images/train2017/00000005.jpg\n
The txt files in the train2017 folder under labels are the annotation information of the corresponding images, supporting both detect and segment formats.

Detect format: Usually each row has 5 columns, corresponding to the category id and the center coordinates xy and width and height wh after normalization of the annotation box

62 0.417040 0.206280 0.403600 0.412560\n62 0.818810 0.197933 0.174740 0.189680\n39 0.684540 0.277773 0.086240 0.358960\n0 0.620220 0.725853 0.751680 0.525840\n63 0.197190 0.364053 0.394380 0.669653\n39 0.932330 0.226240 0.034820 0.076640\n
segment format: the first data in each line is the category id, followed by pairs of normalized coordinate points x, y

45 0.782016 0.986521 0.937078 0.874167 0.957297 0.782021 0.950562 0.739333 0.825844 0.561792 0.714609 0.420229 0.657297 0.391021 0.608422 0.4 0.0303438 0.750562 0.0016875 0.811229 0.003375 0.889896 0.0320156 0.986521\n45 0.557859 0.143813 0.487078 0.0314583 0.859547 0.00897917 0.985953 0.130333 0.984266 0.184271 0.930344 0.386521 0.80225 0.480896 0.763484 0.485396 0.684266 0.39775 0.670781 0.3955 0.679219 0.310104 0.642141 0.253937 0.561234 0.155063 0.559547 0.137083\n50 0.39 0.727063 0.418234 0.649417 0.455297 0.614125 0.476469 0.614125 0.51 0.590583 0.54 0.569417 0.575297 0.562354 0.601766 0.56 0.607062 0.536479 0.614125 0.522354 0.637063 0.501167 0.665297 0.48 0.69 0.477646 0.698828 0.494125 0.698828 0.534125 0.712938 0.529417 0.742938 0.548229 0.760594 0.564708 0.774703 0.550583 0.778234 0.536479 0.781766 0.531771 0.792359 0.541167 0.802937 0.555292 0.802937 0.569417 0.802937 0.576479 0.822359 0.576479 0.822359 0.597646 0.811766 0.607062 0.811766 0.618833 0.818828 0.637646 0.820594 0.656479 0.827641 0.687063 0.827641 0.703521 0.829406 0.727063 0.838234 0.708229 0.852359 0.729417 0.868234 0.750583 0.871766 0.792938 0.877063 0.821167 0.884125 0.861167 0.817062 0.92 0.734125 0.976479 0.711172 0.988229 0.48 0.988229 0.494125 0.967063 0.517062 0.912937 0.508234 0.832937 0.485297 0.788229 0.471172 0.774125 0.395297 0.729417\n45 0.375219 0.0678333 0.375219 0.0590833 0.386828 0.0503542 0.424156 0.0315208 0.440797 0.0281458 0.464 0.0389167 0.525531 0.115583 0.611797 0.222521 0.676359 0.306583 0.678875 0.317354 0.677359 0.385271 0.66475 0.394687 0.588594 0.407458 0.417094 0.517771 0.280906 0.604521 0.0806562 0.722208 0.0256719 0.763917 0.00296875 0.809646 0 0.786104 0 0.745083 0 0.612583 0.03525 0.613271 0.0877187 0.626708 0.130594 0.626708 0.170437 0.6025 0.273844 0.548708 0.338906 0.507 0.509906 0.4115 0.604734 0.359042 0.596156 0.338188 0.595141 0.306583 0.595141 0.291792 0.579516 0.213104 0.516969 0.129042 0.498297 0.100792 0.466516 0.0987708 0.448875 0.0786042 0.405484 0.0705208 0.375219 0.0678333 0.28675 0.108375 0.282719 0.123167 0.267078 0.162854 0.266062 0.189083 0.245391 0.199833 0.203516 0.251625 0.187375 0.269771 0.159641 0.240188 0.101125 0.249604 0 0.287271 0 0.250271 0 0.245563 0.0975938 0.202521 0.203516 0.145354 0.251953 0.123167 0.28675 0.108375\n49 0.587812 0.128229 0.612281 0.0965625 0.663391 0.0840833 0.690031 0.0908125 0.700109 0.10425 0.705859 0.133042 0.700109 0.143604 0.686422 0.146479 0.664828 0.153188 0.644672 0.157042 0.629563 0.175271 0.605797 0.181021 0.595 0.147437\n49 0.7405 0.178417 0.733719 0.173896 0.727781 0.162583 0.729484 0.150167 0.738812 0.124146 0.747281 0.0981458 0.776109 0.0811875 0.804094 0.0845833 0.814266 0.102667 0.818516 0.115104 0.812578 0.133208 0.782906 0.151292 0.754063 0.172771\n49 0.602656 0.178854 0.636125 0.167875 0.655172 0.165125 0.6665 0.162375 0.680391 0.155521 0.691719 0.153458 0.703047 0.154146 0.713859 0.162375 0.724156 0.174729 0.730844 0.193271 0.733422 0.217979 0.733938 0.244063 0.733422 0.281813 0.732391 0.295542 0.728266 0.300354 0.702016 0.294854 0.682969 0.28525 0.672156 0.270146\n49 0.716891 0.0519583 0.683766 0.0103958 0.611688 0.0051875 0.568828 0.116875 0.590266 0.15325 0.590266 0.116875 0.613641 0.0857083 0.631172 0.0857083 0.6565 0.083125 0.679875 0.0883125 0.691563 0.0961042 0.711031 0.0649375\n
instances_val2017.json is the verification set annotation in coco format, which can directly call coco api for map calculation.

During training & reasoning, you need to modify train_set, val_set, test_set in configs/coco.yaml to the actual data path

For actual examples of using MindYOLO kit to complete custom dataset finetune, please refer to Finetune

"},{"location":"how_to_guides/write_a_new_model/","title":"Model Writing Guide","text":"

This document provides a tutorial for writing custom models for MindYOLO. It is divided into three parts:

  • Model definition: We can define a network directly or use a yaml file to define a network.
  • Register model: Optional. After registration, you can use the file name in the create_model interface to create a custom model
  • Verification: Verify whether the model is operational
"},{"location":"how_to_guides/write_a_new_model/#model-definition","title":"Model definition","text":""},{"location":"how_to_guides/write_a_new_model/#1-use-python-code-directly-to-write-the-network","title":"1. Use python code directly to write the network","text":""},{"location":"how_to_guides/write_a_new_model/#module-import","title":"Module import","text":"

Import the nn module and ops module in the MindSpore framework to define the components and operations of the neural network.

import mindspore.nn as nn\nimport mindspore.ops.operations as ops\n

"},{"location":"how_to_guides/write_a_new_model/#create-a-model","title":"Create a model","text":"

Define a model class MyModel that inherits from nn.Cell. In the constructor init, define the various components of the model:

class MyModel(nn.Cell):\n    def __init__(self):\n        super(MyModel, self).__init__()\n        #conv1\u662f\u4e00\u4e2a2D\u5377\u79ef\u5c42\uff0c\u8f93\u5165\u901a\u9053\u6570\u4e3a3\uff0c\u8f93\u51fa\u901a\u9053\u6570\u4e3a16\uff0c\u5377\u79ef\u6838\u5927\u5c0f\u4e3a3x3\uff0c\u6b65\u957f\u4e3a1\uff0c\u586b\u5145\u4e3a1\u3002\n        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)\n        #relu\u662f\u4e00\u4e2aReLU\u6fc0\u6d3b\u51fd\u6570\u64cd\u4f5c\u3002\n        self.relu = ops.ReLU()\n        #axpool\u662f\u4e00\u4e2a2D\u6700\u5927\u6c60\u5316\u5c42\uff0c\u6c60\u5316\u7a97\u53e3\u5927\u5c0f\u4e3a2x2\uff0c\u6b65\u957f\u4e3a2\u3002\n        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)\n        #conv2\u662f\u53e6\u4e00\u4e2a2D\u5377\u79ef\u5c42\uff0c\u8f93\u5165\u901a\u9053\u6570\u4e3a16\uff0c\u8f93\u51fa\u901a\u9053\u6570\u4e3a32\uff0c\u5377\u79ef\u6838\u5927\u5c0f\u4e3a3x3\uff0c\u6b65\u957f\u4e3a1\uff0c\u586b\u5145\u4e3a1\u3002\n        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)\n        #fc\u662f\u4e00\u4e2a\u5168\u8fde\u63a5\u5c42\uff0c\u8f93\u5165\u7279\u5f81\u7ef4\u5ea6\u4e3a32x8x8\uff0c\u8f93\u51fa\u7279\u5f81\u7ef4\u5ea6\u4e3a10\u3002\n        self.fc = nn.Dense(32 * 8 * 8, 10)\n\n    #\u5728construct\u65b9\u6cd5\u4e2d\uff0c\u5b9a\u4e49\u4e86\u6a21\u578b\u7684\u524d\u5411\u4f20\u64ad\u8fc7\u7a0b\u3002\u8f93\u5165x\u7ecf\u8fc7\u5377\u79ef\u3001\u6fc0\u6d3b\u51fd\u6570\u3001\u6c60\u5316\u7b49\u64cd\u4f5c\u540e\uff0c\u901a\u8fc7\u5c55\u5e73\u64cd\u4f5c\u5c06\u7279\u5f81\u5f20\u91cf\u53d8\u4e3a\u4e00\u7ef4\u5411\u91cf\uff0c\u7136\u540e\u901a\u8fc7\u5168\u8fde\u63a5\u5c42\u5f97\u5230\u6700\u7ec8\u7684\u8f93\u51fa\u7ed3\u679c\u3002    \n    def construct(self, x): \n        x = self.conv1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n        x = self.conv2(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n        x = x.view(x.shape[0], -1)\n        x = self.fc(x)\n        return x\n
"},{"location":"how_to_guides/write_a_new_model/#create-a-model-instance","title":"Create a model instance","text":"

By instantiating the MyModel class, create a model instance model, which can be used for model training and reasoning later.

model = MyModel()\n

"},{"location":"how_to_guides/write_a_new_model/#2-use-yaml-file-to-write-network","title":"2. Use yaml file to write network","text":"

Usually need the following three steps:

  • Create a new mymodel.yaml file
  • Create a corresponding mymodel.py file
  • Introduce the model in the mindyolo/models/init.py file

Here is a detailed guide to writing mymodel.yaml file: Take writing a simple network as an example: Write the necessary parameters in yaml format, and then use these parameters in the mymodel.py file. The network part is the model network [[from, number, module, args], ...]: Each element represents the configuration of a network layer.

# The yaml in __BASE__ indicates the base configuration file for inheritance. Repeated parameters will be overwritten by the current file;\n__BASE__:\n-'../coco.yaml'\n-'./hyp.scratch-high.yaml'\n\nper_batch_size: 32\nimg_size: 640\nsync_bn: False\n\nnetwork:\nmodel_name: mymodel\ndepth_multiple: 1.0 # model depth multiple\nwidth_multiple: 1.0 # layer channel multiple\nstride: [ 8, 16, 32 ]\n\n# Configuration of the backbone network. The meaning of each layer is\n# [from, number, module, args]\n# Take the first layer as an example, [-1, 1, ConvNormAct, [32, 3, 1]], which means the input comes from `-1` (the previous layer), the number of repetitions is 1, and the module name is ConvNormAct, module input parameters are [32, 3, 1];\nbackbone:\n[[-1, 1, ConvNormAct, [32, 3, 1]], # 0\n[-1, 1, ConvNormAct, [64, 3, 2]], # 1-P1/2\n[-1, 1, Bottleneck, [64]],\n[-1, 1, ConvNormAct, [128, 3, 2]], # 3-P2/4\n[-1, 2, Bottleneck, [128]],\n[-1, 1, ConvNormAct, [256, 3, 2]], # 5-P3/8\n[-1, 8, Bottleneck, [256]],\n]\n\n#head part configuration\nhead:\n[\n[ -1, 1, ConvNormAct, [ 512, 3, 2 ] ], # 7-P4/16\n[ -1, 8, Bottleneck, [ 512 ] ],\n[ -1, 1, ConvNormAct, [ 1024, 3, 2 ] ], # 9-P5/32\n[ -1, 4, Bottleneck, [ 1024 ] ], # 10\n]\n

Write mymodel.py file:

"},{"location":"how_to_guides/write_a_new_model/#module-import_1","title":"Module import","text":"

It is necessary to import modules in the package. For example, from .registry import register_model, etc.

import numpy as np\n\nimport mindspore as ms\nfrom mindspore import Tensor, nn\n\nfrom .initializer import initialize_defult #Used to initialize the default parameters of the model, including weight initialization method, BN layer parameters, etc.\nfrom .model_factory import build_model_from_cfg #Used to build a target detection model according to the parameters in the YAML configuration file and return an instance of the model.\nfrom .registry import register_model #Used to register a custom model in Mindyolo for use in the YAML configuration file.\n\n#Visibility declaration\n__all__ = [\"MYmodel\", \"mymodel\"]\n
"},{"location":"how_to_guides/write_a_new_model/#create-a-configuration-dictionary","title":"Create a configuration dictionary","text":"

The _cfg function is an auxiliary function used to create a configuration dictionary. It accepts a url parameter and other keyword parameters and returns a dictionary containing the url and other parameters. default_cfgs is a dictionary used to store default configurations. Here, mymodel is used as the key to create a configuration dictionary using the _cfg function.

def _cfg(url=\"\", **kwargs):\n    return {\"url\": url, **kwargs}\n\ndefault_cfgs = {\"mymodel\": _cfg(url=\"\")}\n

"},{"location":"how_to_guides/write_a_new_model/#create-a-model_1","title":"Create a model","text":"

In MindSpore, the model class inherits from nn.Cell. Generally, the following two functions need to be overloaded:

  • In the __init__ function, the module layer needed in the model should be defined.
  • In the construct function, define the model forward logic.
class MYmodel(nn.Cell):\n\n    def __init__(self, cfg, in_channels=3, num_classes=None, sync_bn=False):\n        super(MYmodel, self).__init__()\n        self.cfg = cfg\n        self.stride = Tensor(np.array(cfg.stride), ms.int32)\n        self.stride_max = int(max(self.cfg.stride))\n        ch, nc = in_channels, num_classes\n\n        self.nc = nc  # override yaml value\n        self.model = build_model_from_cfg(model_cfg=cfg, in_channels=ch, num_classes=nc, sync_bn=sync_bn)\n        self.names = [str(i) for i in range(nc)]  # default names\n\n        initialize_defult()  # \u53ef\u9009\uff0c\u4f60\u53ef\u80fd\u9700\u8981initialize_defult\u65b9\u6cd5\u4ee5\u83b7\u5f97\u548cpytorch\u4e00\u6837\u7684conv2d\u3001dense\u5c42\u7684\u521d\u59cb\u5316\u65b9\u5f0f\uff1b\n\n    def construct(self, x):\n        return self.model(x)\n
"},{"location":"how_to_guides/write_a_new_model/#register-model-optional","title":"Register model (optional)","text":"

If you need to use the mindyolo interface to initialize a custom model, you need to first register and import the model

Model registration

@register_model #The registered model can be accessed by the create_model interface as a model name;\ndef mymodel(cfg, in_channels=3, num_classes=None, **kwargs) -> MYmodel:\n\"\"\"Get GoogLeNet model.\n    Refer to the base class `models.GoogLeNet` for more details.\"\"\"\n    model = MYmodel(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)\n    return model\n
Model import

#Add the following code to the mindyolo/models/_init_.py file\n\nfrom . import mymodel #mymodel.py files are usually placed in the mindyolo/models/directory\n__all__.extend(mymodel.__all__)\nfrom .mymodel import *\n
"},{"location":"how_to_guides/write_a_new_model/#verify-main","title":"Verify main","text":"

The initial writing phase should ensure that the model is runnable. Basic verification can be performed through the following code block: First import the required modules and functions. Then, parse the configuration object.

if __name__ == \"__main__\":\n    from mindyolo.models.model_factory import create_model\n    from mindyolo.utils.config import parse_config\n\n    opt = parse_config()\n
Create a model and specify related parameters. Note: If you want to use the file name to create a custom model in create_model, you need to register it using the register @register_model first. Please refer to the above Register model (optional) section
    model = create_model(\n        model_name=\"mymodel\",\n        model_cfg=opt.net,\n        num_classes=opt.data.nc,\n        sync_bn=opt.sync_bn if hasattr(opt, \"sync_bn\") else False,\n    )\n

Otherwise, please use import to introduce the model

    from mindyolo.models.mymodel import MYmodel\n    model = MYmodel(\n        model_name=\"mymodel\",\n        model_cfg=opt.net,\n        num_classes=opt.data.nc,\n        sync_bn=opt.sync_bn if hasattr(opt, \"sync_bn\") else False,\n    ) \n
Finally, create an input tensor x and pass it to the model for forward computation.
    x = Tensor(np.random.randn(1, 3, 640, 640), ms.float32)\n    out = model(x)\n    out = out[0] if isinstance(out, (list, tuple)) else out\n    print(f\"Output shape is {[o.shape for o in out]}\")\n

"},{"location":"modelzoo/benchmark/","title":"Benchmark","text":""},{"location":"modelzoo/benchmark/#detection","title":"Detection","text":"performance tested on Ascend 910(8p) with graph mode Name Scale BatchSize ImageSize Dataset Box mAP (%) Params Recipe Download YOLOv8 N 16 * 8 640 MS COCO 2017 37.2 3.2M yaml weights YOLOv8 S 16 * 8 640 MS COCO 2017 44.6 11.2M yaml weights YOLOv8 M 16 * 8 640 MS COCO 2017 50.5 25.9M yaml weights YOLOv8 L 16 * 8 640 MS COCO 2017 52.8 43.7M yaml weights YOLOv8 X 16 * 8 640 MS COCO 2017 53.7 68.2M yaml weights YOLOv7 Tiny 16 * 8 640 MS COCO 2017 37.5 6.2M yaml weights YOLOv7 L 16 * 8 640 MS COCO 2017 50.8 36.9M yaml weights YOLOv7 X 12 * 8 640 MS COCO 2017 52.4 71.3M yaml weights YOLOv5 N 32 * 8 640 MS COCO 2017 27.3 1.9M yaml weights YOLOv5 S 32 * 8 640 MS COCO 2017 37.6 7.2M yaml weights YOLOv5 M 32 * 8 640 MS COCO 2017 44.9 21.2M yaml weights YOLOv5 L 32 * 8 640 MS COCO 2017 48.5 46.5M yaml weights YOLOv5 X 16 * 8 640 MS COCO 2017 50.5 86.7M yaml weights YOLOv4 CSPDarknet53 16 * 8 608 MS COCO 2017 45.4 27.6M yaml weights YOLOv4 CSPDarknet53(silu) 16 * 8 608 MS COCO 2017 45.8 27.6M yaml weights YOLOv3 Darknet53 16 * 8 640 MS COCO 2017 45.5 61.9M yaml weights YOLOX N 8 * 8 416 MS COCO 2017 24.1 0.9M yaml weights YOLOX Tiny 8 * 8 416 MS COCO 2017 33.3 5.1M yaml weights YOLOX S 8 * 8 640 MS COCO 2017 40.7 9.0M yaml weights YOLOX M 8 * 8 640 MS COCO 2017 46.7 25.3M yaml weights YOLOX L 8 * 8 640 MS COCO 2017 49.2 54.2M yaml weights YOLOX X 8 * 8 640 MS COCO 2017 51.6 99.1M yaml weights YOLOX Darknet53 8 * 8 640 MS COCO 2017 47.7 63.7M yaml weights performance tested on Ascend 910*(8p) Name Scale BatchSize ImageSize Dataset Box mAP (%) ms/step Params Recipe Download YOLOv8 N 16 * 8 640 MS COCO 2017 37.3 373.55 3.2M yaml weights YOLOv8 S 16 * 8 640 MS COCO 2017 44.7 365.53 11.2M yaml weights YOLOv7 Tiny 16 * 8 640 MS COCO 2017 37.5 496.21 6.2M yaml weights YOLOv5 N 32 * 8 640 MS COCO 2017 27.4 736.08 1.9M yaml weights YOLOv5 S 32 * 8 640 MS COCO 2017 37.6 787.34 7.2M yaml weights YOLOv4 CSPDarknet53 16 * 8 608 MS COCO 2017 46.1 337.25 27.6M yaml weights YOLOv3 Darknet53 16 * 8 640 MS COCO 2017 46.6 396.60 61.9M yaml weights YOLOX S 8 * 8 640 MS COCO 2017 41.0 242.15 9.0M yaml weights"},{"location":"modelzoo/benchmark/#segmentation","title":"Segmentation","text":"performance tested on Ascend 910(8p) with graph mode Name Scale BatchSize ImageSize Dataset Box mAP (%) Mask mAP (%) Params Recipe Download YOLOv8-seg X 16 * 8 640 MS COCO 2017 52.5 42.9 71.8M yaml weights"},{"location":"modelzoo/benchmark/#deploy-inference","title":"Deploy inference","text":"
  • See deployment
"},{"location":"modelzoo/benchmark/#notes","title":"Notes","text":"
  • Box mAP: Accuracy reported on the validation set.
"},{"location":"modelzoo/yolov3/","title":"YOLOv3","text":"

YOLOv3: An Incremental Improvement

"},{"location":"modelzoo/yolov3/#abstract","title":"Abstract","text":"

We present some updates to YOLO! We made a bunch of little design changes to make it better. We also trained this new network that's pretty swell. It's a little bigger than last time but more accurate. It's still fast though, don't worry. At 320x320 YOLOv3 runs in 22 ms at 28.2 mAP, as accurate as SSD but three times faster. When we look at the old .5 IOU mAP detection metric YOLOv3 is quite good. It achieves 57.9 mAP@50 in 51 ms on a Titan X, compared to 57.5 mAP@50 in 198 ms by RetinaNet, similar performance but 3.8x faster.

"},{"location":"modelzoo/yolov3/#results","title":"Results","text":"performance tested on Ascend 910(8p) with graph mode Name Scale BatchSize ImageSize Dataset Box mAP (%) Params Recipe Download YOLOv3 Darknet53 16 * 8 640 MS COCO 2017 45.5 61.9M yaml weights performance tested on Ascend 910*(8p) Name Scale BatchSize ImageSize Dataset Box mAP (%) ms/step Params Recipe Download YOLOv3 Darknet53 16 * 8 640 MS COCO 2017 46.6 396.60 61.9M yaml weights"},{"location":"modelzoo/yolov3/#notes","title":"Notes","text":"
  • Box mAP: Accuracy reported on the validation set.
  • We referred to a commonly used third-party YOLOv3 implementation.
"},{"location":"modelzoo/yolov3/#quick-start","title":"Quick Start","text":"

Please refer to the QUICK START in MindYOLO for details.

"},{"location":"modelzoo/yolov3/#training","title":"Training","text":""},{"location":"modelzoo/yolov3/#-pretraining-model","title":"- Pretraining Model","text":"

You can get the pre-training model from here.

To convert it to a loadable ckpt file for mindyolo, please put it in the root directory then run it

python mindyolo/utils/convert_weight_darknet53.py\n

"},{"location":"modelzoo/yolov3/#-distributed-training","title":"- Distributed Training","text":"

It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run

# distributed training on multiple GPU/Ascend devices\nmpirun -n 8 python train.py --config ./configs/yolov3/yolov3.yaml --device_target Ascend --is_parallel True\n

If the script is executed by the root user, the --allow-run-as-root parameter must be added to mpirun.

Similarly, you can train the model on multiple GPU devices with the above mpirun command.

For detailed illustration of all hyper-parameters, please refer to config.py.

Note: As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size.

"},{"location":"modelzoo/yolov3/#-standalone-training","title":"- Standalone Training","text":"

If you want to train or finetune the model on a smaller dataset without distributed training, please run:

# standalone training on a CPU/GPU/Ascend device\npython train.py --config ./configs/yolov3/yolov3.yaml --device_target Ascend\n
"},{"location":"modelzoo/yolov3/#validation-and-test","title":"Validation and Test","text":"

To validate the accuracy of the trained model, you can use test.py and parse the checkpoint path with --weight.

python test.py --config ./configs/yolov3/yolov3.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt\n
"},{"location":"modelzoo/yolov3/#deployment","title":"Deployment","text":"

See here.

"},{"location":"modelzoo/yolov3/#references","title":"References","text":"

[1] Jocher Glenn. YOLOv3 release v9.1. https://github.com/ultralytics/yolov3/releases/tag/v9.1, 2021. [2] Joseph Redmon and Ali Farhadi. YOLOv3: An incremental improvement. arXiv preprint arXiv:1804.02767, 2018.

"},{"location":"modelzoo/yolov4/","title":"YOLOv4","text":"

YOLOv4: Optimal Speed and Accuracy of Object Detection

"},{"location":"modelzoo/yolov4/#abstract","title":"Abstract","text":"

There are a huge number of features which are said to improve Convolutional Neural Network (CNN) accuracy. Practical testing of combinations of such features on large datasets, and theoretical justification of the result, is required. Some features operate on certain models exclusively and for certain problems exclusively, or only for small-scale datasets; while some features, such as batch-normalization and residual-connections, are applicable to the majority of models, tasks, and datasets. We assume that such universal features include Weighted-Residual-Connections (WRC), Cross-Stage-Partial-connections (CSP), Cross mini-Batch Normalization (CmBN), Self-adversarial-training (SAT) and Mish-activation. We use new features: WRC, CSP, CmBN, SAT, Mish activation, Mosaic data augmentation, CmBN, DropBlock regularization, and CIoU loss, and combine some of them to achieve state-of-the-art results: 43.5% AP (65.7% AP50) for the MS COCO dataset at a realtime speed of 65 FPS on Tesla V100.

"},{"location":"modelzoo/yolov4/#results","title":"Results","text":"performance tested on Ascend 910(8p) with graph mode Name Scale BatchSize ImageSize Dataset Box mAP (%) Params Recipe Download YOLOv4 CSPDarknet53 16 * 8 608 MS COCO 2017 45.4 27.6M yaml weights YOLOv4 CSPDarknet53(silu) 16 * 8 608 MS COCO 2017 45.8 27.6M yaml weights performance tested on Ascend 910*(8p) Name Scale BatchSize ImageSize Dataset Box mAP (%) ms/step Params Recipe Download YOLOv4 CSPDarknet53 16 * 8 608 MS COCO 2017 46.1 337.25 27.6M yaml weights"},{"location":"modelzoo/yolov4/#notes","title":"Notes","text":"
  • Box mAP: Accuracy reported on the validation set.
"},{"location":"modelzoo/yolov4/#quick-start","title":"Quick Start","text":"

Please refer to the QUICK START in MindYOLO for details.

"},{"location":"modelzoo/yolov4/#training","title":"Training","text":""},{"location":"modelzoo/yolov4/#-pretraining-model","title":"- Pretraining Model","text":"

You can get the pre-training model trained on ImageNet2012 from here.

To convert it to a loadable ckpt file for mindyolo, please put it in the root directory then run it

python mindyolo/utils/convert_weight_cspdarknet53.py\n

"},{"location":"modelzoo/yolov4/#-distributed-training","title":"- Distributed Training","text":"

It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run

# distributed training on multiple GPU/Ascend devices\nmpirun -n 8 python train.py --config ./configs/yolov4/yolov4-silu.yaml --device_target Ascend --is_parallel True --epochs 320\n

If the script is executed by the root user, the --allow-run-as-root parameter must be added to mpirun.

Similarly, you can train the model on multiple GPU devices with the above mpirun command.

For detailed illustration of all hyper-parameters, please refer to config.py.

"},{"location":"modelzoo/yolov4/#notes_1","title":"Notes","text":"
  • As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size.
  • If the following warning occurs, setting the environment variable PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning' will fix it.
    multiprocessing/semaphore_tracker.py: 144 UserWarning: semaphore_tracker: There appear to be 235 leaked semaphores to clean up at shutdown len(cache))\n
"},{"location":"modelzoo/yolov4/#-standalone-training","title":"- Standalone Training","text":"

If you want to train or finetune the model on a smaller dataset without distributed training, please run:

# standalone training on a CPU/GPU/Ascend device\npython train.py --config ./configs/yolov4/yolov4-silu.yaml --device_target Ascend --epochs 320\n
"},{"location":"modelzoo/yolov4/#validation-and-test","title":"Validation and Test","text":"

To validate the accuracy of the trained model, you can use test.py and parse the checkpoint path with --weight.

python test.py --config ./configs/yolov4/yolov4-silu.yaml --device_target Ascend --iou_thres 0.6 --weight /PATH/TO/WEIGHT.ckpt\n
"},{"location":"modelzoo/yolov4/#deployment","title":"Deployment","text":"

See here.

"},{"location":"modelzoo/yolov4/#references","title":"References","text":"

[1] Alexey Bochkovskiy, Chien-Yao Wang and Ali Farhadi. YOLOv4: Optimal Speed and Accuracy of Object Detection. arXiv preprint arXiv:2004.10934, 2020.

"},{"location":"modelzoo/yolov5/","title":"YOLOv5","text":""},{"location":"modelzoo/yolov5/#abstract","title":"Abstract","text":"

YOLOv5 is a family of object detection architectures and models pretrained on the COCO dataset, representing Ultralytics open-source research into future vision AI methods, incorporating lessons learned and best practices evolved over thousands of hours of research and development.

"},{"location":"modelzoo/yolov5/#results","title":"Results","text":"performance tested on Ascend 910(8p) with graph mode Name Scale BatchSize ImageSize Dataset Box mAP (%) Params Recipe Download YOLOv5 N 32 * 8 640 MS COCO 2017 27.3 1.9M yaml weights YOLOv5 S 32 * 8 640 MS COCO 2017 37.6 7.2M yaml weights YOLOv5 M 32 * 8 640 MS COCO 2017 44.9 21.2M yaml weights YOLOv5 L 32 * 8 640 MS COCO 2017 48.5 46.5M yaml weights YOLOv5 X 16 * 8 640 MS COCO 2017 50.5 86.7M yaml weights performance tested on Ascend 910*(8p) Name Scale BatchSize ImageSize Dataset Box mAP (%) ms/step Params Recipe Download YOLOv5 N 32 * 8 640 MS COCO 2017 27.4 736.08 1.9M yaml weights YOLOv5 S 32 * 8 640 MS COCO 2017 37.6 787.34 7.2M yaml weights"},{"location":"modelzoo/yolov5/#notes","title":"Notes","text":"
  • Box mAP: Accuracy reported on the validation set.
  • We refer to the official YOLOV5 to reproduce the P5 series model, and the differences are as follows: We use 8x NPU(Ascend910) for training, and the single-NPU batch size is 32. This is different from the official code.
"},{"location":"modelzoo/yolov5/#quick-start","title":"Quick Start","text":"

Please refer to the QUICK START in MindYOLO for details.

"},{"location":"modelzoo/yolov5/#training","title":"Training","text":""},{"location":"modelzoo/yolov5/#-distributed-training","title":"- Distributed Training","text":"

It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run

# distributed training on multiple GPU/Ascend devices\nmpirun -n 8 python train.py --config ./configs/yolov5/yolov5n.yaml --device_target Ascend --is_parallel True\n

If the script is executed by the root user, the --allow-run-as-root parameter must be added to mpirun.

Similarly, you can train the model on multiple GPU devices with the above mpirun command.

For detailed illustration of all hyper-parameters, please refer to config.py.

Note: As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size.

"},{"location":"modelzoo/yolov5/#-standalone-training","title":"- Standalone Training","text":"

If you want to train or finetune the model on a smaller dataset without distributed training, please run:

# standalone training on a CPU/GPU/Ascend device\npython train.py --config ./configs/yolov5/yolov5n.yaml --device_target Ascend\n
"},{"location":"modelzoo/yolov5/#validation-and-test","title":"Validation and Test","text":"

To validate the accuracy of the trained model, you can use test.py and parse the checkpoint path with --weight.

python test.py --config ./configs/yolov5/yolov5n.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt\n
"},{"location":"modelzoo/yolov5/#deployment","title":"Deployment","text":"

See here.

"},{"location":"modelzoo/yolov5/#references","title":"References","text":"

[1] Jocher Glenn. YOLOv5 release v6.1. https://github.com/ultralytics/yolov5/releases/tag/v6.1, 2022.

"},{"location":"modelzoo/yolov7/","title":"YOLOv7","text":"

YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors

"},{"location":"modelzoo/yolov7/#abstract","title":"Abstract","text":"

YOLOv7 surpasses all known object detectors in both speed and accuracy in the range from 5 FPS to 160 FPS and has the highest accuracy 56.8% AP among all known real-time object detectors with 30 FPS or higher on GPU V100. YOLOv7-E6 object detector (56 FPS V100, 55.9% AP) outperforms both transformer-based detector SWIN-L Cascade-Mask R-CNN (9.2 FPS A100, 53.9% AP) by 509% in speed and 2% in accuracy, and convolutional-based detector ConvNeXt-XL Cascade-Mask R-CNN (8.6 FPS A100, 55.2% AP) by 551% in speed and 0.7% AP in accuracy, as well as YOLOv7 outperforms: YOLOR, YOLOX, Scaled-YOLOv4, YOLOv5, DETR, Deformable DETR, DINO-5scale-R50, ViT-Adapter-B and many other object detectors in speed and accuracy. Moreover, we train YOLOv7 only on MS COCO dataset from scratch without using any other datasets or pre-trained weights.

"},{"location":"modelzoo/yolov7/#results","title":"Results","text":"performance tested on Ascend 910(8p) with graph mode Name Scale BatchSize ImageSize Dataset Box mAP (%) Params Recipe Download YOLOv7 Tiny 16 * 8 640 MS COCO 2017 37.5 6.2M yaml weights YOLOv7 L 16 * 8 640 MS COCO 2017 50.8 36.9M yaml weights YOLOv7 X 12 * 8 640 MS COCO 2017 52.4 71.3M yaml weights performance tested on Ascend 910*(8p) Name Scale BatchSize ImageSize Dataset Box mAP (%) ms/step Params Recipe Download YOLOv7 Tiny 16 * 8 640 MS COCO 2017 37.5 496.21 6.2M yaml weights"},{"location":"modelzoo/yolov7/#notes","title":"Notes","text":"
  • Context: Training context denoted as {device}x{pieces}-{MS mode}, where mindspore mode can be G - graph mode or F - pynative mode with ms function. For example, D910x8-G is for training on 8 pieces of Ascend 910 NPU using graph mode.
  • Box mAP: Accuracy reported on the validation set.
  • We refer to the official YOLOV7 to reproduce the P5 series model, and the differences are as follows: We use 8x NPU(Ascend910) for training, and the single-NPU batch size for tiny/l/x is 16/16/12. This is different from the official code.
"},{"location":"modelzoo/yolov7/#quick-start","title":"Quick Start","text":"

Please refer to the QUICK START in MindYOLO for details.

"},{"location":"modelzoo/yolov7/#training","title":"Training","text":""},{"location":"modelzoo/yolov7/#-distributed-training","title":"- Distributed Training","text":"

It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run

# distributed training on multiple GPU/Ascend devices\nmpirun -n 8 python train.py --config ./configs/yolov7/yolov7.yaml --device_target Ascend --is_parallel True\n

If the script is executed by the root user, the --allow-run-as-root parameter must be added to mpirun.

Similarly, you can train the model on multiple GPU devices with the above mpirun command.

For detailed illustration of all hyper-parameters, please refer to config.py.

Note: As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size.

"},{"location":"modelzoo/yolov7/#-standalone-training","title":"- Standalone Training","text":"

If you want to train or finetune the model on a smaller dataset without distributed training, please run:

# standalone training on a CPU/GPU/Ascend device\npython train.py --config ./configs/yolov7/yolov7.yaml --device_target Ascend\n
"},{"location":"modelzoo/yolov7/#validation-and-test","title":"Validation and Test","text":"

To validate the accuracy of the trained model, you can use test.py and parse the checkpoint path with --weight.

python test.py --config ./configs/yolov7/yolov7.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt\n
"},{"location":"modelzoo/yolov7/#deployment","title":"Deployment","text":"

See here.

"},{"location":"modelzoo/yolov7/#references","title":"References","text":"

[1] Chien-Yao Wang, Alexey Bochkovskiy, and HongYuan Mark Liao. Yolov7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. arXiv preprint arXiv:2207.02696, 2022.

"},{"location":"modelzoo/yolov8/","title":"YOLOv8","text":""},{"location":"modelzoo/yolov8/#abstract","title":"Abstract","text":"

Ultralytics YOLOv8, developed by Ultralytics, is a cutting-edge, state-of-the-art (SOTA) model that builds upon the success of previous YOLO versions and introduces new features and improvements to further boost performance and flexibility. YOLOv8 is designed to be fast, accurate, and easy to use, making it an excellent choice for a wide range of object detection, image segmentation and image classification tasks.

"},{"location":"modelzoo/yolov8/#results","title":"Results","text":""},{"location":"modelzoo/yolov8/#detection","title":"Detection","text":"performance tested on Ascend 910(8p) with graph mode Name Scale BatchSize ImageSize Dataset Box mAP (%) Params Recipe Download YOLOv8 N 16 * 8 640 MS COCO 2017 37.2 3.2M yaml weights YOLOv8 S 16 * 8 640 MS COCO 2017 44.6 11.2M yaml weights YOLOv8 M 16 * 8 640 MS COCO 2017 50.5 25.9M yaml weights YOLOv8 L 16 * 8 640 MS COCO 2017 52.8 43.7M yaml weights YOLOv8 X 16 * 8 640 MS COCO 2017 53.7 68.2M yaml weights performance tested on Ascend 910*(8p) Name Scale BatchSize ImageSize Dataset Box mAP (%) ms/step Params Recipe Download YOLOv8 N 16 * 8 640 MS COCO 2017 37.3 373.55 3.2M yaml weights YOLOv8 S 16 * 8 640 MS COCO 2017 44.7 365.53 11.2M yaml weights"},{"location":"modelzoo/yolov8/#segmentation","title":"Segmentation","text":"performance tested on Ascend 910(8p) with graph mode Name Scale BatchSize ImageSize Dataset Box mAP (%) Mask mAP (%) Params Recipe Download YOLOv8-seg X 16 * 8 640 MS COCO 2017 52.5 42.9 71.8M yaml weights"},{"location":"modelzoo/yolov8/#notes","title":"Notes","text":"
  • Box mAP: Accuracy reported on the validation set.
  • We refer to the official YOLOV8 to reproduce the P5 series model.
"},{"location":"modelzoo/yolov8/#quick-start","title":"Quick Start","text":"

Please refer to the QUICK START in MindYOLO for details.

"},{"location":"modelzoo/yolov8/#training","title":"Training","text":""},{"location":"modelzoo/yolov8/#-distributed-training","title":"- Distributed Training","text":"

It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run

# distributed training on multiple GPU/Ascend devices\nmpirun -n 8 python train.py --config ./configs/yolov8/yolov8n.yaml --device_target Ascend --is_parallel True\n

If the script is executed by the root user, the --allow-run-as-root parameter must be added to mpirun.

Similarly, you can train the model on multiple GPU devices with the above mpirun command.

For detailed illustration of all hyper-parameters, please refer to config.py.

Note: As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction or adjust the learning rate linearly to a new global batch size.

"},{"location":"modelzoo/yolov8/#-standalone-training","title":"- Standalone Training","text":"

If you want to train or finetune the model on a smaller dataset without distributed training, please run:

# standalone training on a CPU/GPU/Ascend device\npython train.py --config ./configs/yolov8/yolov8n.yaml --device_target Ascend\n
"},{"location":"modelzoo/yolov8/#validation-and-test","title":"Validation and Test","text":"

To validate the accuracy of the trained model, you can use test.py and parse the checkpoint path with --weight.

python test.py --config ./configs/yolov8/yolov8n.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt\n
"},{"location":"modelzoo/yolov8/#deployment","title":"Deployment","text":"

See here.

"},{"location":"modelzoo/yolov8/#references","title":"References","text":"

[1] Jocher Glenn. Ultralytics YOLOv8. https://github.com/ultralytics/ultralytics, 2023.

"},{"location":"modelzoo/yolox/","title":"YOLOX","text":""},{"location":"modelzoo/yolox/#abstract","title":"Abstract","text":"

YOLOX is a new high-performance detector with some experienced improvements to YOLO series. We switch the YOLO detector to an anchor-free manner and conduct other advanced detection techniques, i.e., a decoupled head and the leading label assignment strategy SimOTA to achieve state-of-the-art results across a large scale range of models: For YOLO-Nano with only 0.91M parameters and 1.08G FLOPs, we get 25.3% AP on COCO, surpassing NanoDet by 1.8% AP; for YOLOv3, one of the most widely used detectors in industry, we boost it to 47.3% AP on COCO, outperforming the current best practice by 3.0% AP; for YOLOX-L with roughly the same amount of parameters as YOLOv4-CSP, YOLOv5-L, we achieve 50.0% AP on COCO at a speed of 68.9 FPS on Tesla V100, exceeding YOLOv5-L by 1.8% AP. Further, we won the 1st Place on Streaming Perception Challenge (Workshop on Autonomous Driving at CVPR 2021) using a single YOLOX-L model.

"},{"location":"modelzoo/yolox/#results","title":"Results","text":"performance tested on Ascend 910(8p) with graph mode Name Scale BatchSize ImageSize Dataset Box mAP (%) Params Recipe Download YOLOX N 8 * 8 416 MS COCO 2017 24.1 0.9M yaml weights YOLOX Tiny 8 * 8 416 MS COCO 2017 33.3 5.1M yaml weights YOLOX S 8 * 8 640 MS COCO 2017 40.7 9.0M yaml weights YOLOX M 8 * 8 640 MS COCO 2017 46.7 25.3M yaml weights YOLOX L 8 * 8 640 MS COCO 2017 49.2 54.2M yaml weights YOLOX X 8 * 8 640 MS COCO 2017 51.6 99.1M yaml weights YOLOX Darknet53 8 * 8 640 MS COCO 2017 47.7 63.7M yaml weights performance tested on Ascend 910*(8p) Name Scale BatchSize ImageSize Dataset Box mAP (%) ms/step Params Recipe Download YOLOX S 8 * 8 640 MS COCO 2017 41.0 242.15 9.0M yaml weights"},{"location":"modelzoo/yolox/#notes","title":"Notes","text":"
  • Box mAP: Accuracy reported on the validation set.
  • We refer to the official YOLOX to reproduce the results.
"},{"location":"modelzoo/yolox/#quick-start","title":"Quick Start","text":"

Please refer to the QUICK START in MindYOLO for details.

"},{"location":"modelzoo/yolox/#training","title":"Training","text":""},{"location":"modelzoo/yolox/#-distributed-training","title":"- Distributed Training","text":"

It is easy to reproduce the reported results with the pre-defined training recipe. For distributed training on multiple Ascend 910 devices, please run

# distributed training on multiple GPU/Ascend devices\nmpirun -n 8 python train.py --config ./configs/yolox/yolox-s.yaml --device_target Ascend --is_parallel True\n

If the script is executed by the root user, the --allow-run-as-root parameter must be added to mpirun.

Similarly, you can train the model on multiple GPU devices with the above mpirun command.

For detailed illustration of all hyper-parameters, please refer to config.py.

Note: As the global batch size (batch_size x num_devices) is an important hyper-parameter, it is recommended to keep the global batch size unchanged for reproduction.

"},{"location":"modelzoo/yolox/#-standalone-training","title":"- Standalone Training","text":"

If you want to train or finetune the model on a smaller dataset without distributed training, please firstly run:

# standalone 1st stage training on a CPU/GPU/Ascend device\npython train.py --config ./configs/yolox/yolox-s.yaml --device_target Ascend\n
"},{"location":"modelzoo/yolox/#validation-and-test","title":"Validation and Test","text":"

To validate the accuracy of the trained model, you can use test.py and parse the checkpoint path with --weight.

python test.py --config ./configs/yolox/yolox-s.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt\n
"},{"location":"modelzoo/yolox/#deployment","title":"Deployment","text":"

See here.

"},{"location":"modelzoo/yolox/#references","title":"References","text":"

[1] Zheng Ge. YOLOX: Exceeding YOLO Series in 2021. https://arxiv.org/abs/2107.08430, 2021.

"},{"location":"notes/changelog/","title":"Change Log","text":"

Coming soon.

"},{"location":"notes/code_of_conduct/","title":"Code of Conduct","text":"

Coming soon.

"},{"location":"notes/contributing/","title":"MindYOLO contributing guidelines","text":""},{"location":"notes/contributing/#contributor-license-agreement","title":"Contributor License Agreement","text":"

It's required to sign CLA before your first code submission to MindYOLO community.

For individual contributor, please refer to ICLA online document for the detailed information.

"},{"location":"notes/contributing/#getting-started","title":"Getting Started","text":"
  • Fork the repository on Github.
  • Read the README.md.
"},{"location":"notes/contributing/#contribution-workflow","title":"Contribution Workflow","text":""},{"location":"notes/contributing/#code-style","title":"Code style","text":"

Please follow this style to make MindYOLO easy to review, maintain and develop.

  • Coding guidelines

    The Python coding style suggested by Python PEP 8 Coding Style and C++ coding style suggested by Google C++ Coding Guidelines are used in MindYOLO community. The CppLint, CppCheck, CMakeLint, CodeSpell, Lizard, ShellCheck and PyLint are used to check the format of codes, installing these plugins in your IDE is recommended.

  • Unittest guidelines

    The Python unittest style suggested by pytest and C++ unittest style suggested by Googletest Primer are used in MindYOLO community. The design intent of a testcase should be reflected by its name of comment.

  • Refactoring guidelines

    We encourage developers to refactor our code to eliminate the code smell. All codes should conform to needs to the coding style and testing style, and refactoring codes are no exception. Lizard threshold for nloc (lines of code without comments) is 100 and for cnc (cyclomatic complexity number) is 20, when you receive a Lizard warning, you have to refactor the code you want to merge.

  • Document guidelines

    We use MarkdownLint to check the format of markdown documents. MindYOLO CI modifies the following rules based on the default configuration.

    • MD007 (unordered list indentation): The indent parameter is set to 4, indicating that all content in the unordered list needs to be indented using four spaces.
    • MD009 (spaces at the line end): The br_spaces parameter is set to 2, indicating that there can be 0 or 2 spaces at the end of a line.
    • MD029 (sequence numbers of an ordered list): The style parameter is set to ordered, indicating that the sequence numbers of the ordered list are in ascending order.

    For details, please refer to RULES.

"},{"location":"notes/contributing/#fork-pull-development-model","title":"Fork-Pull development model","text":"
  • Fork MindYOLO repository

    Before submitting code to MindYOLO project, please make sure that this project have been forked to your own repository. It means that there will be parallel development between MindYOLO repository and your own repository, so be careful to avoid the inconsistency between them.

  • Clone the remote repository

    If you want to download the code to the local machine, git is the best way:

    # For GitHub\ngit clone https://github.com/{insert_your_forked_repo}/mindyolo.git\ngit remote add upstream https://github.com/mindspore-lab/mindyolo.git\n
  • Develop code locally

    To avoid inconsistency between multiple branches, checking out to a new branch is SUGGESTED:

    git checkout -b {new_branch_name} origin/master\n

    Taking the master branch as an example, MindYOLO may create version branches and downstream development branches as needed, please fix bugs upstream first. Then you can change the code arbitrarily.

  • Push the code to the remote repository

    After updating the code, you should push the update in the formal way:

    git add .\ngit status # Check the update status\ngit commit -m \"Your commit title\"\ngit commit -s --amend #Add the concrete description of your commit\ngit push origin {new_branch_name}\n
  • Pull a request to MindYOLO repository

    In the last step, your need to pull a compare request between your new branch and MindYOLO master branch. After finishing the pull request, the Jenkins CI will be automatically set up for building test. Your pull request should be merged into the upstream master branch as soon as possible to reduce the risk of merging.

"},{"location":"notes/contributing/#report-issues","title":"Report issues","text":"

A great way to contribute to the project is to send a detailed report when you encounter an issue. We always appreciate a well-written, thorough bug report, and will thank you for it!

When reporting issues, refer to this format:

  • What version of env (MindSpore, os, python, MindYOLO etc) are you using?
  • Is this a BUG REPORT or FEATURE REQUEST?
  • What kind of issue is, add the labels to highlight it on the issue dashboard.
  • What happened?
  • What you expected to happen?
  • How to reproduce it?(as minimally and precisely as possible)
  • Special notes for your reviewers?

Issues advisory:

  • If you find an unclosed issue, which is exactly what you are going to solve, please put some comments on that issue to tell others you would be in charge of it.
  • If an issue is opened for a while, it's recommended for contributors to precheck before working on solving that issue.
  • If you resolve an issue which is reported by yourself, it's also required to let others know before closing that issue.
  • If you want the issue to be responded as quickly as possible, please try to label it, you can find kinds of labels on Label List
"},{"location":"notes/contributing/#propose-prs","title":"Propose PRs","text":"
  • Raise your idea as an issue on GitHub
  • If it is a new feature that needs lots of design details, a design proposal should also be submitted.
  • After reaching consensus in the issue discussions and design proposal reviews, complete the development on the forked repo and submit a PR.
  • None of PRs is not permitted until it receives 2+ LGTM from approvers. Please NOTICE that approver is NOT allowed to add LGTM on his own PR.
  • After PR is sufficiently discussed, it will get merged, abandoned or rejected depending on the outcome of the discussion.

PRs advisory:

  • Any irrelevant changes should be avoided.
  • Make sure your commit history being ordered.
  • Always keep your branch up with the master branch.
  • For bug-fix PRs, make sure all related issues being linked.
"},{"location":"notes/faq/","title":"FAQ","text":"

Coming soon.

"},{"location":"reference/data/","title":"Data","text":""},{"location":"reference/data/#data-loader","title":"Data Loader","text":""},{"location":"reference/data/#mindyolo.data.loader.create_loader","title":"mindyolo.data.loader.create_loader(dataset, batch_collate_fn, column_names_getitem, column_names_collate, batch_size, epoch_size=1, rank=0, rank_size=1, num_parallel_workers=8, shuffle=True, drop_remainder=False, python_multiprocessing=False)","text":"

Creates dataloader.

Applies operations such as transform and batch to the ms.dataset.Dataset object created by the create_dataset function to get the dataloader.

PARAMETER DESCRIPTION dataset

dataset object created by create_dataset.

TYPE: COCODataset

batch_size

The number of rows each batch is created with. An int or callable object which takes exactly 1 parameter, BatchInfo.

TYPE: int or function

drop_remainder

Determines whether to drop the last block whose data row number is less than batch size (default=False). If True, and if there are less than batch_size rows available to make the last batch, then those rows will be dropped and not propagated to the child node.

TYPE: bool DEFAULT: False

num_parallel_workers

Number of workers(threads) to process the dataset in parallel (default=None).

TYPE: int DEFAULT: 8

python_multiprocessing

Parallelize Python operations with multiple worker processes. This option could be beneficial if the Python operation is computational heavy (default=False).

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION

BatchDataset, dataset batched.

Source code in mindyolo/data/loader.py
def create_loader(\n    dataset,\n    batch_collate_fn,\n    column_names_getitem,\n    column_names_collate,\n    batch_size,\n    epoch_size=1,\n    rank=0,\n    rank_size=1,\n    num_parallel_workers=8,\n    shuffle=True,\n    drop_remainder=False,\n    python_multiprocessing=False,\n):\nr\"\"\"Creates dataloader.\n\n    Applies operations such as transform and batch to the `ms.dataset.Dataset` object\n    created by the `create_dataset` function to get the dataloader.\n\n    Args:\n        dataset (COCODataset): dataset object created by `create_dataset`.\n        batch_size (int or function): The number of rows each batch is created with. An\n            int or callable object which takes exactly 1 parameter, BatchInfo.\n        drop_remainder (bool, optional): Determines whether to drop the last block\n            whose data row number is less than batch size (default=False). If True, and if there are less\n            than batch_size rows available to make the last batch, then those rows will\n            be dropped and not propagated to the child node.\n        num_parallel_workers (int, optional): Number of workers(threads) to process the dataset in parallel\n            (default=None).\n        python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker processes. This\n            option could be beneficial if the Python operation is computational heavy (default=False).\n\n    Returns:\n        BatchDataset, dataset batched.\n    \"\"\"\n    de.config.set_seed(1236517205 + rank)\n    cores = multiprocessing.cpu_count()\n    num_parallel_workers = min(int(cores / rank_size), num_parallel_workers)\n    logger.info(f\"Dataloader num parallel workers: [{num_parallel_workers}]\")\n    if rank_size > 1:\n        ds = de.GeneratorDataset(\n            dataset,\n            column_names=column_names_getitem,\n            num_parallel_workers=min(8, num_parallel_workers),\n            shuffle=shuffle,\n            python_multiprocessing=python_multiprocessing,\n            num_shards=rank_size,\n            shard_id=rank,\n        )\n    else:\n        ds = de.GeneratorDataset(\n            dataset,\n            column_names=column_names_getitem,\n            num_parallel_workers=min(32, num_parallel_workers),\n            shuffle=shuffle,\n            python_multiprocessing=python_multiprocessing,\n        )\n    ds = ds.batch(\n        batch_size, per_batch_map=batch_collate_fn,\n        input_columns=column_names_getitem, output_columns=column_names_collate, drop_remainder=drop_remainder\n    )\n    ds = ds.repeat(epoch_size)\n\n    return ds\n
"},{"location":"reference/data/#dataset","title":"Dataset","text":""},{"location":"reference/data/#mindyolo.data.dataset.COCODataset","title":"mindyolo.data.dataset.COCODataset","text":"

Load the COCO dataset (yolo format coco labels)

PARAMETER DESCRIPTION dataset_path

dataset label directory for dataset.

TYPE: str DEFAULT: ''

for

COCO_ROOT \u251c\u2500\u2500 train2017.txt \u251c\u2500\u2500 annotations \u2502 \u2514\u2500\u2500 instances_train2017.json \u251c\u2500\u2500 images \u2502 \u2514\u2500\u2500 train2017 \u2502 \u251c\u2500\u2500 000000000001.jpg \u2502 \u2514\u2500\u2500 000000000002.jpg \u2514\u2500\u2500 labels \u2514\u2500\u2500 train2017 \u251c\u2500\u2500 000000000001.txt \u2514\u2500\u2500 000000000002.txt dataset_path (str): ./coco/train2017.txt

TYPE: example

transforms

A list of images data enhancements that apply data enhancements on data set objects in order.

TYPE: list

Source code in mindyolo/data/dataset.py
class COCODataset:\n\"\"\"\n    Load the COCO dataset (yolo format coco labels)\n\n    Args:\n        dataset_path (str): dataset label directory for dataset.\n        for example:\n            COCO_ROOT\n                \u251c\u2500\u2500 train2017.txt\n                \u251c\u2500\u2500 annotations\n                \u2502     \u2514\u2500\u2500 instances_train2017.json\n                \u251c\u2500\u2500 images\n                \u2502     \u2514\u2500\u2500 train2017\n                \u2502             \u251c\u2500\u2500 000000000001.jpg\n                \u2502             \u2514\u2500\u2500 000000000002.jpg\n                \u2514\u2500\u2500 labels\n                      \u2514\u2500\u2500 train2017\n                              \u251c\u2500\u2500 000000000001.txt\n                              \u2514\u2500\u2500 000000000002.txt\n            dataset_path (str): ./coco/train2017.txt\n        transforms (list): A list of images data enhancements\n            that apply data enhancements on data set objects in order.\n    \"\"\"\n\n    def __init__(\n        self,\n        dataset_path=\"\",\n        img_size=640,\n        transforms_dict=None,\n        is_training=False,\n        augment=False,\n        rect=False,\n        single_cls=False,\n        batch_size=32,\n        stride=32,\n        num_cls=80,\n        pad=0.0,\n        return_segments=False,  # for segment\n        return_keypoints=False, # for keypoint\n        nkpt=0,                 # for keypoint\n        ndim=0                  # for keypoint\n    ):\n        # acceptable image suffixes\n        self.img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo']\n        self.cache_version = 0.2\n\n        self.return_segments = return_segments\n        self.return_keypoints = return_keypoints\n        assert not (return_segments and return_keypoints), 'Can not return both segments and keypoints.'\n\n        self.path = dataset_path\n        self.img_size = img_size\n        self.augment = augment\n        self.rect = rect\n        self.stride = stride\n        self.num_cls = num_cls\n        self.nkpt = nkpt\n        self.ndim = ndim\n        self.transforms_dict = transforms_dict\n        self.is_training = is_training\n\n        # set column names\n        self.column_names_getitem = ['samples']\n        if self.is_training:\n            self.column_names_collate = ['images', 'labels']\n            if self.return_segments:\n                self.column_names_collate = ['images', 'labels', 'masks']\n            elif self.return_keypoints:\n                self.column_names_collate = ['images', 'labels', 'keypoints']\n        else:\n            self.column_names_collate = [\"images\", \"img_files\", \"hw_ori\", \"hw_scale\", \"pad\"]\n\n        try:\n            f = []  # image files\n            for p in self.path if isinstance(self.path, list) else [self.path]:\n                p = Path(p)  # os-agnostic\n                if p.is_dir():  # dir\n                    f += glob.glob(str(p / \"**\" / \"*.*\"), recursive=True)\n                elif p.is_file():  # file\n                    with open(p, \"r\") as t:\n                        t = t.read().strip().splitlines()\n                        parent = str(p.parent) + os.sep\n                        f += [x.replace(\"./\", parent) if x.startswith(\"./\") else x for x in t]  # local to global path\n                else:\n                    raise Exception(f\"{p} does not exist\")\n            self.img_files = sorted([x.replace(\"/\", os.sep) for x in f if x.split(\".\")[-1].lower() in self.img_formats])\n            assert self.img_files, f\"No images found\"\n        except Exception as e:\n            raise Exception(f\"Error loading data from {self.path}: {e}\\n\")\n\n        # Check cache\n        self.label_files = self._img2label_paths(self.img_files)  # labels\n        cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix(\".cache.npy\")  # cached labels\n        if cache_path.is_file():\n            cache, exists = np.load(cache_path, allow_pickle=True).item(), True  # load dict\n            if cache[\"version\"] == self.cache_version \\\n                    and cache[\"hash\"] == self._get_hash(self.label_files + self.img_files):\n                logger.info(f\"Dataset Cache file hash/version check success.\")\n                logger.info(f\"Load dataset cache from [{cache_path}] success.\")\n            else:\n                logger.info(f\"Dataset cache file hash/version check fail.\")\n                logger.info(f\"Datset caching now...\")\n                cache, exists = self.cache_labels(cache_path), False  # cache\n                logger.info(f\"Dataset caching success.\")\n        else:\n            logger.info(f\"No dataset cache available, caching now...\")\n            cache, exists = self.cache_labels(cache_path), False  # cache\n            logger.info(f\"Dataset caching success.\")\n\n        # Display cache\n        nf, nm, ne, nc, n = cache.pop(\"results\")  # found, missing, empty, corrupted, total\n        if exists:\n            d = f\"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted\"\n            tqdm(None, desc=d, total=n, initial=n)  # display cache results\n        assert nf > 0 or not augment, f\"No labels in {cache_path}. Can not train without labels.\"\n\n        # Read cache\n        cache.pop(\"hash\")  # remove hash\n        cache.pop(\"version\")  # remove version\n        self.labels = cache['labels']\n        self.img_files = [lb['im_file'] for lb in self.labels]  # update im_files\n\n        # Check if the dataset is all boxes or all segments\n        lengths = ((len(lb['cls']), len(lb['bboxes']), len(lb['segments'])) for lb in self.labels)\n        len_cls, len_boxes, len_segments = (sum(x) for x in zip(*lengths))\n        if len_segments and len_boxes != len_segments:\n            print(\n                f'WARNING \u26a0\ufe0f Box and segment counts should be equal, but got len(segments) = {len_segments}, '\n                f'len(boxes) = {len_boxes}. To resolve this only boxes will be used and all segments will be removed. '\n                'To avoid this please supply either a detect or segment dataset, not a detect-segment mixed dataset.')\n            for lb in self.labels:\n                lb['segments'] = []\n        if len_cls == 0:\n            raise ValueError(f'All labels empty in {cache_path}, can not start training without labels.')\n\n        if single_cls:\n            for x in self.labels:\n                x['cls'][:, 0] = 0\n\n        n = len(self.labels)  # number of images\n        bi = np.floor(np.arange(n) / batch_size).astype(np.int_)  # batch index\n        nb = bi[-1] + 1  # number of batches\n        self.batch = bi  # batch index of image\n\n        # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)\n        self.imgs, self.img_hw_ori, self.indices = None, None, range(n)\n\n        # Rectangular Train/Test\n        if self.rect:\n            # Sort by aspect ratio\n            s = self.img_shapes  # wh\n            ar = s[:, 1] / s[:, 0]  # aspect ratio\n            irect = ar.argsort()\n            self.img_files = [self.img_files[i] for i in irect]\n            self.label_files = [self.label_files[i] for i in irect]\n            self.labels = [self.labels[i] for i in irect]\n            self.img_shapes = s[irect]  # wh\n            ar = ar[irect]\n\n            # Set training image shapes\n            shapes = [[1, 1]] * nb\n            for i in range(nb):\n                ari = ar[bi == i]\n                mini, maxi = ari.min(), ari.max()\n                if maxi < 1:\n                    shapes[i] = [maxi, 1]\n                elif mini > 1:\n                    shapes[i] = [1, 1 / mini]\n\n            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int_) * stride\n\n        self.imgIds = [int(Path(im_file).stem) for im_file in self.img_files]\n\n    def cache_labels(self, path=Path(\"./labels.cache.npy\")):\n        # Cache dataset labels, check images and read shapes\n        x = {'labels': []}  # dict\n        nm, nf, ne, nc, segments, keypoints = 0, 0, 0, 0, [], None  # number missing, found, empty, duplicate\n        pbar = tqdm(zip(self.img_files, self.label_files), desc=\"Scanning images\", total=len(self.img_files))\n        if self.return_keypoints and (self.nkpt <= 0 or self.ndim not in (2, 3)):\n            raise ValueError(\"'kpt_shape' in data.yaml missing or incorrect. Should be a list with [number of \"\n                             \"keypoints, number of dims (2 for x,y or 3 for x,y,visible)], i.e. 'kpt_shape: [17, 3]'\")\n        for i, (im_file, lb_file) in enumerate(pbar):\n            try:\n                # verify images\n                im = Image.open(im_file)\n                im.verify()  # PIL verify\n                shape = self._exif_size(im)  # image size\n                segments = []  # instance segments\n                assert (shape[0] > 9) & (shape[1] > 9), f\"image size {shape} <10 pixels\"\n                assert im.format.lower() in self.img_formats, f\"invalid image format {im.format}\"\n\n                # verify labels\n                if os.path.isfile(lb_file):\n                    nf += 1  # label found\n                    with open(lb_file, \"r\") as f:\n                        lb = [x.split() for x in f.read().strip().splitlines()]\n                        if any([len(x) > 6 for x in lb]) and (not self.return_keypoints):  # is segment\n                            classes = np.array([x[0] for x in lb], dtype=np.float32)\n                            segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb]  # (cls, xy1...)\n                            lb = np.concatenate(\n                                (classes.reshape(-1, 1), segments2boxes(segments)), 1\n                            )  # (cls, xywh)\n                        lb = np.array(lb, dtype=np.float32)\n                    nl = len(lb)\n                    if nl:\n                        if self.return_keypoints:\n                            assert lb.shape[1] == (5 + self.nkpt * self.ndim), \\\n                                f'labels require {(5 + self.nkpt * self.ndim)} columns each'\n                            assert (lb[:, 5::self.ndim] <= 1).all(), 'non-normalized or out of bounds coordinate labels'\n                            assert (lb[:, 6::self.ndim] <= 1).all(), 'non-normalized or out of bounds coordinate labels'\n                        else:\n                            assert lb.shape[1] == 5, f'labels require 5 columns, {lb.shape[1]} columns detected'\n                            assert (lb[:, 1:] <= 1).all(), \\\n                                f'non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}'\n                            assert (lb >= 0).all(), f'negative label values {lb[lb < 0]}'\n                        # All labels\n                        max_cls = int(lb[:, 0].max())  # max label count\n                        assert max_cls <= self.num_cls, \\\n                            f'Label class {max_cls} exceeds dataset class count {self.num_cls}. ' \\\n                            f'Possible class labels are 0-{self.num_cls - 1}'\n                        _, j = np.unique(lb, axis=0, return_index=True)\n                        if len(j) < nl:  # duplicate row check\n                            lb = lb[j]  # remove duplicates\n                            if segments:\n                                segments = [segments[x] for x in i]\n                            print(f'WARNING \u26a0\ufe0f {im_file}: {nl - len(j)} duplicate labels removed')\n                    else:\n                        ne += 1  # label empty\n                        lb = np.zeros((0, (5 + self.nkpt * self.ndim)), dtype=np.float32) \\\n                            if self.return_keypoints else np.zeros((0, 5), dtype=np.float32)\n                else:\n                    nm += 1  # label missing\n                    lb = np.zeros((0, (5 + self.nkpt * self.ndim)), dtype=np.float32) \\\n                        if self.return_keypoints else np.zeros((0, 5), dtype=np.float32)\n                if self.return_keypoints:\n                    keypoints = lb[:, 5:].reshape(-1, self.nkpt, self.ndim)\n                    if self.ndim == 2:\n                        kpt_mask = np.ones(keypoints.shape[:2], dtype=np.float32)\n                        kpt_mask = np.where(keypoints[..., 0] < 0, 0.0, kpt_mask)\n                        kpt_mask = np.where(keypoints[..., 1] < 0, 0.0, kpt_mask)\n                        keypoints = np.concatenate([keypoints, kpt_mask[..., None]], axis=-1)  # (nl, nkpt, 3)\n                lb = lb[:, :5]\n                x['labels'].append(\n                    dict(\n                        im_file=im_file,\n                        cls=lb[:, 0:1],     # (n, 1)\n                        bboxes=lb[:, 1:],   # (n, 4)\n                        segments=segments,  # list of (mi, 2)\n                        keypoints=keypoints,\n                        bbox_format='xywhn',\n                        segment_format='polygon'\n                    )\n                )\n            except Exception as e:\n                nc += 1\n                print(f\"WARNING: Ignoring corrupted image and/or label {im_file}: {e}\")\n\n            pbar.desc = f\"Scanning '{path.parent / path.stem}' images and labels... \" \\\n                        f\"{nf} found, {nm} missing, {ne} empty, {nc} corrupted\"\n        pbar.close()\n\n        if nf == 0:\n            print(f\"WARNING: No labels found in {path}.\")\n\n        x[\"hash\"] = self._get_hash(self.label_files + self.img_files)\n        x[\"results\"] = nf, nm, ne, nc, len(self.img_files)\n        x[\"version\"] = self.cache_version  # cache version\n        np.save(path, x)  # save for next time\n        logger.info(f\"New cache created: {path}\")\n        return x\n\n    def __getitem__(self, index):\n        sample = self.get_sample(index)\n\n        for _i, ori_trans in enumerate(self.transforms_dict):\n            _trans = ori_trans.copy()\n            func_name, prob = _trans.pop(\"func_name\"), _trans.pop(\"prob\", 1.0)\n            if func_name == 'copy_paste':\n                sample = self.copy_paste(sample, prob)\n            elif random.random() < prob:\n                if func_name == \"albumentations\" and getattr(self, \"albumentations\", None) is None:\n                    self.albumentations = Albumentations(size=self.img_size, **_trans)\n                if func_name == \"letterbox\":\n                    new_shape = self.img_size if not self.rect else self.batch_shapes[self.batch[index]]\n                    sample = self.letterbox(sample, new_shape, **_trans)\n                else:\n                    sample = getattr(self, func_name)(sample, **_trans)\n\n        sample['img'] = np.ascontiguousarray(sample['img'])\n        return sample\n\n    def __len__(self):\n        return len(self.img_files)\n\n    def get_sample(self, index):\n\"\"\"Get and return label information from the dataset.\"\"\"\n        sample = deepcopy(self.labels[index])\n        if self.imgs is None:\n            path = self.img_files[index]\n            img = cv2.imread(path)  # BGR\n            assert img is not None, \"Image Not Found \" + path\n            h_ori, w_ori = img.shape[:2]  # orig hw\n            r = self.img_size / max(h_ori, w_ori)  # resize image to img_size\n            if r != 1:  # always resize down, only resize up if training with augmentation\n                interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR\n                img = cv2.resize(img, (int(w_ori * r), int(h_ori * r)), interpolation=interp)\n\n            sample['img'], sample['ori_shape'] = img, np.array([h_ori, w_ori])  # img, hw_original\n\n        else:\n            sample['img'], sample['ori_shape'] = self.imgs[index], self.img_hw_ori[index]  # img, hw_original\n\n        return sample\n\n    def mosaic(\n        self,\n        sample,\n        mosaic9_prob=0.0,\n        post_transform=None,\n    ):\n        segment_format = sample['segment_format']\n        bbox_format = sample['bbox_format']\n        assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'\n        assert bbox_format == 'xywhn', f'The bbox format should be xywhn, but got {bbox_format}'\n\n        mosaic9_prob = min(1.0, max(mosaic9_prob, 0.0))\n        if random.random() < (1 - mosaic9_prob):\n            sample = self._mosaic4(sample)\n        else:\n            sample = self._mosaic9(sample)\n\n        if post_transform:\n            for _i, ori_trans in enumerate(post_transform):\n                _trans = ori_trans.copy()\n                func_name, prob = _trans.pop(\"func_name\"), _trans.pop(\"prob\", 1.0)\n                sample = getattr(self, func_name)(sample, **_trans)\n\n        return sample\n\n    def _mosaic4(self, sample):\n        # loads images in a 4-mosaic\n        classes4, bboxes4, segments4 = [], [], []\n        mosaic_samples = [sample, ]\n        indices = random.choices(self.indices, k=3)  # 3 additional image indices\n\n        segments_is_list = isinstance(sample['segments'], list)\n        if segments_is_list:\n            mosaic_samples += [self.get_sample(i) for i in indices]\n        else:\n            mosaic_samples += [self.resample_segments(self.get_sample(i)) for i in indices]\n\n        s = self.img_size\n        mosaic_border = [-s // 2, -s // 2]\n        yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in mosaic_border]  # mosaic center x, y\n\n        for i, mosaic_sample in enumerate(mosaic_samples):\n            # Load image\n            img = mosaic_sample['img']\n            (h, w) = img.shape[:2]\n\n            # place img in img4\n            if i == 0:  # top left\n                img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles\n                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)\n                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)\n            elif i == 1:  # top right\n                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc\n                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h\n            elif i == 2:  # bottom left\n                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)\n                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)\n            elif i == 3:  # bottom right\n                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)\n                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)\n\n            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]\n            padw = x1a - x1b\n            padh = y1a - y1b\n\n            # box and cls\n            cls, bboxes = mosaic_sample['cls'], mosaic_sample['bboxes']\n            assert mosaic_sample['bbox_format'] == 'xywhn'\n            bboxes = xywhn2xyxy(bboxes, w, h, padw, padh)  # normalized xywh to pixel xyxy format\n            classes4.append(cls)\n            bboxes4.append(bboxes)\n\n            # seg\n            assert mosaic_sample['segment_format'] == 'polygon'\n            segments = mosaic_sample['segments']\n            if segments_is_list:\n                segments = [xyn2xy(x, w, h, padw, padh) for x in segments]\n                segments4.extend(segments)\n            else:\n                segments = xyn2xy(segments, w, h, padw, padh)\n                segments4.append(segments)\n\n        classes4 = np.concatenate(classes4, 0)\n        bboxes4 = np.concatenate(bboxes4, 0)\n        bboxes4 = bboxes4.clip(0, 2 * s)\n\n        if segments_is_list:\n            for x in segments4:\n                np.clip(x, 0, 2 * s, out=x)\n        else:\n            segments4 = np.concatenate(segments4, 0)\n            segments4 = segments4.clip(0, 2 * s)\n\n        sample['img'] = img4\n        sample['cls'] = classes4\n        sample['bboxes'] = bboxes4\n        sample['bbox_format'] = 'ltrb'\n        sample['segments'] = segments4\n        sample['mosaic_border'] = mosaic_border\n\n        return sample\n\n    def _mosaic9(self, sample):\n        # loads images in a 9-mosaic\n        classes9, bboxes9, segments9 = [], [], []\n        mosaic_samples = [sample, ]\n        indices = random.choices(self.indices, k=8)  # 8 additional image indices\n\n        segments_is_list = isinstance(sample['segments'], list)\n        if segments_is_list:\n            mosaic_samples += [self.get_sample(i) for i in indices]\n        else:\n            mosaic_samples += [self.resample_segments(self.get_sample(i)) for i in indices]\n        s = self.img_size\n        mosaic_border = [-s // 2, -s // 2]\n\n        for i, mosaic_sample in enumerate(mosaic_samples):\n            # Load image\n            img = mosaic_sample['img']\n            (h, w) = img.shape[:2]\n\n            # place img in img9\n            if i == 0:  # center\n                img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles\n                h0, w0 = h, w\n                c = s, s, s + w, s + h  # xmin, ymin, xmax, ymax (base) coordinates\n            elif i == 1:  # top\n                c = s, s - h, s + w, s\n            elif i == 2:  # top right\n                c = s + wp, s - h, s + wp + w, s\n            elif i == 3:  # right\n                c = s + w0, s, s + w0 + w, s + h\n            elif i == 4:  # bottom right\n                c = s + w0, s + hp, s + w0 + w, s + hp + h\n            elif i == 5:  # bottom\n                c = s + w0 - w, s + h0, s + w0, s + h0 + h\n            elif i == 6:  # bottom left\n                c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h\n            elif i == 7:  # left\n                c = s - w, s + h0 - h, s, s + h0\n            elif i == 8:  # top left\n                c = s - w, s + h0 - hp - h, s, s + h0 - hp\n\n            padx, pady = c[:2]\n            x1, y1, x2, y2 = [max(x, 0) for x in c]  # allocate coords\n\n            # box and cls\n            assert mosaic_sample['bbox_format'] == 'xywhn'\n            cls, bboxes = mosaic_sample['cls'], mosaic_sample['bboxes']\n            bboxes = xywhn2xyxy(bboxes, w, h, padx, pady)  # normalized xywh to pixel xyxy format\n            classes9.append(cls)\n            bboxes9.append(bboxes)\n\n            # seg\n            assert mosaic_sample['segment_format'] == 'polygon'\n            segments = mosaic_sample['segments']\n            if segments_is_list:\n                segments = [xyn2xy(x, w, h, padx, pady) for x in segments]\n                segments9.extend(segments)\n            else:\n                segments = xyn2xy(segments, w, h, padx, pady)\n                segments9.append(segments)\n\n            # Image\n            img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:]  # img9[ymin:ymax, xmin:xmax]\n            hp, wp = h, w  # height, width previous\n\n        # Offset\n        yc, xc = [int(random.uniform(0, s)) for _ in mosaic_border]  # mosaic center x, y\n        img9 = img9[yc: yc + 2 * s, xc: xc + 2 * s]\n\n        # Concat/clip labels\n        classes9 = np.concatenate(classes9, 0)\n        bboxes9 = np.concatenate(bboxes9, 0)\n        bboxes9[:, [0, 2]] -= xc\n        bboxes9[:, [1, 3]] -= yc\n        bboxes9 = bboxes9.clip(0, 2 * s)\n\n        if segments_is_list:\n            c = np.array([xc, yc])  # centers\n            segments9 = [x - c for x in segments9]\n            for x in segments9:\n                np.clip(x, 0, 2 * s, out=x)\n        else:\n            segments9 = np.concatenate(segments9, 0)\n            segments9[..., 0] -= xc\n            segments9[..., 1] -= yc\n            segments9 = segments9.clip(0, 2 * s)\n\n        sample['img'] = img9\n        sample['cls'] = classes9\n        sample['bboxes'] = bboxes9\n        sample['bbox_format'] = 'ltrb'\n        sample['segments'] = segments9\n        sample['mosaic_border'] = mosaic_border\n\n        return sample\n\n    def resample_segments(self, sample, n=1000):\n        segment_format = sample['segment_format']\n        assert segment_format == 'polygon', f'The segment format is should be polygon, but got {segment_format}'\n\n        segments = sample['segments']\n        if len(segments) > 0:\n            # Up-sample an (n,2) segment\n            for i, s in enumerate(segments):\n                s = np.concatenate((s, s[0:1, :]), axis=0)\n                x = np.linspace(0, len(s) - 1, n)\n                xp = np.arange(len(s))\n                segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T  # segment xy\n            segments = np.stack(segments, axis=0)\n        else:\n            segments = np.zeros((0, 1000, 2), dtype=np.float32)\n        sample['segments'] = segments\n        return sample\n\n    def copy_paste(self, sample, probability=0.5):\n        # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)\n        bbox_format, segment_format = sample['bbox_format'], sample['segment_format']\n        assert bbox_format == 'ltrb', f'The bbox format should be ltrb, but got {bbox_format}'\n        assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'\n\n        img = sample['img']\n        cls = sample['cls']\n        bboxes = sample['bboxes']\n        segments = sample['segments']\n\n        n = len(segments)\n        if probability and n:\n            h, w, _ = img.shape  # height, width, channels\n            im_new = np.zeros(img.shape, np.uint8)\n            for j in random.sample(range(n), k=round(probability * n)):\n                c, l, s = cls[j], bboxes[j], segments[j]\n                box = w - l[2], l[1], w - l[0], l[3]\n                ioa = bbox_ioa(box, bboxes)  # intersection over area\n                if (ioa < 0.30).all():  # allow 30% obscuration of existing labels\n                    cls = np.concatenate((cls, [c]), 0)\n                    bboxes = np.concatenate((bboxes, [box]), 0)\n                    if isinstance(segments, list):\n                        segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))\n                    else:\n                        segments = np.concatenate((segments, [np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1)]), 0)\n                    cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)\n\n            result = cv2.bitwise_and(src1=img, src2=im_new)\n            result = cv2.flip(result, 1)  # augment segments (flip left-right)\n            i = result > 0  # pixels to replace\n            img[i] = result[i]  # cv2.imwrite('debug.jpg', img)  # debug\n\n        sample['img'] = img\n        sample['cls'] = cls\n        sample['bboxes'] = bboxes\n        sample['segments'] = segments\n\n        return sample\n\n    def random_perspective(\n            self, sample, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, border=(0, 0)\n    ):\n        bbox_format, segment_format = sample['bbox_format'], sample['segment_format']\n        assert bbox_format == 'ltrb', f'The bbox format should be ltrb, but got {bbox_format}'\n        assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'\n\n        img = sample['img']\n        cls = sample['cls']\n        targets = sample['bboxes']\n        segments = sample['segments']\n        assert isinstance(segments, np.ndarray), f\"segments type expect numpy.ndarray, but got {type(segments)}; \" \\\n                                                 f\"maybe you should resample_segments before that.\"\n\n        border = sample.pop('mosaic_border', border)\n        height = img.shape[0] + border[0] * 2  # shape(h,w,c)\n        width = img.shape[1] + border[1] * 2\n\n        # Center\n        C = np.eye(3)\n        C[0, 2] = -img.shape[1] / 2  # x translation (pixels)\n        C[1, 2] = -img.shape[0] / 2  # y translation (pixels)\n\n        # Perspective\n        P = np.eye(3)\n        P[2, 0] = random.uniform(-perspective, perspective)  # x perspective (about y)\n        P[2, 1] = random.uniform(-perspective, perspective)  # y perspective (about x)\n\n        # Rotation and Scale\n        R = np.eye(3)\n        a = random.uniform(-degrees, degrees)\n        s = random.uniform(1 - scale, 1 + scale)\n        R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)\n\n        # Shear\n        S = np.eye(3)\n        S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)\n        S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)\n\n        # Translation\n        T = np.eye(3)\n        T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width  # x translation (pixels)\n        T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height  # y translation (pixels)\n\n        # Combined rotation matrix\n        M = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANT\n        if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed\n            if perspective:\n                img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))\n            else:  # affine\n                img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))\n\n        # Transform label coordinates\n        n = len(targets)\n        if n:\n            use_segments = len(segments)\n            new_bboxes = np.zeros((n, 4))\n            if use_segments:  # warp segments\n                point_num = segments[0].shape[0]\n                new_segments = np.zeros((n, point_num, 2))\n                for i, segment in enumerate(segments):\n                    xy = np.ones((len(segment), 3))\n                    xy[:, :2] = segment\n                    xy = xy @ M.T  # transform\n                    xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]  # perspective rescale or affine\n\n                    # clip\n                    new_segments[i] = xy\n                    new_bboxes[i] = segment2box(xy, width, height)\n\n            else:  # warp boxes\n                xy = np.ones((n * 4, 3))\n                xy[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1\n                xy = xy @ M.T  # transform\n                xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8)  # perspective rescale or affine\n\n                # create new boxes\n                x = xy[:, [0, 2, 4, 6]]\n                y = xy[:, [1, 3, 5, 7]]\n                new_bboxes = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T\n\n                # clip\n                new_bboxes[:, [0, 2]] = new_bboxes[:, [0, 2]].clip(0, width)\n                new_bboxes[:, [1, 3]] = new_bboxes[:, [1, 3]].clip(0, height)\n\n            # filter candidates\n            i = box_candidates(box1=targets.T * s, box2=new_bboxes.T, area_thr=0.01 if use_segments else 0.10)\n\n            cls = cls[i]\n            targets = new_bboxes[i]\n            sample['cls'] = cls\n            sample['bboxes'] = targets\n            if use_segments:\n                sample['segments'] = segments[i]\n\n        sample['img'] = img\n\n        return sample\n\n    def mixup(self, sample, alpha: 32.0, beta: 32.0, pre_transform=None):\n        bbox_format, segment_format = sample['bbox_format'], sample['segment_format']\n        assert bbox_format == 'ltrb', f'The bbox format should be ltrb, but got {bbox_format}'\n        assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'\n\n        index = random.choices(self.indices, k=1)[0]\n        sample2 = self.get_sample(index)\n        if pre_transform:\n            for _i, ori_trans in enumerate(pre_transform):\n                _trans = ori_trans.copy()\n                func_name, prob = _trans.pop(\"func_name\"), _trans.pop(\"prob\", 1.0)\n                if func_name == 'copy_paste':\n                    sample2 = self.copy_paste(sample2, prob)\n                elif random.random() < prob:\n                    if func_name == \"albumentations\" and getattr(self, \"albumentations\", None) is None:\n                        self.albumentations = Albumentations(size=self.img_size, **_trans)\n                    sample2 = getattr(self, func_name)(sample2, **_trans)\n\n        assert isinstance(sample['segments'], np.ndarray), \\\n            f\"MixUp: sample segments type expect numpy.ndarray, but got {type(sample['segments'])}; \" \\\n            f\"maybe you should resample_segments before that.\"\n        assert isinstance(sample2['segments'], np.ndarray), \\\n            f\"MixUp: sample2 segments type expect numpy.ndarray, but got {type(sample2['segments'])}; \" \\\n            f\"maybe you should add resample_segments in pre_transform.\"\n\n        image, image2 = sample['img'], sample2['img']\n        r = np.random.beta(alpha, beta)  # mixup ratio, alpha=beta=8.0\n        image = (image * r + image2 * (1 - r)).astype(np.uint8)\n\n        sample['img'] = image\n        sample['cls'] = np.concatenate((sample['cls'], sample2['cls']), 0)\n        sample['bboxes'] = np.concatenate((sample['bboxes'], sample2['bboxes']), 0)\n        sample['segments'] = np.concatenate((sample['segments'], sample2['segments']), 0)\n        return sample\n\n    def pastein(self, sample, num_sample=30):\n        bbox_format = sample['bbox_format']\n        assert bbox_format == 'ltrb', f'The bbox format should be ltrb, but got {bbox_format}'\n        assert not self.return_segments, \"pastein currently does not support seg data.\"\n        assert not self.return_keypoints, \"pastein currently does not support keypoint data.\"\n        sample.pop('segments', None)\n        sample.pop('keypoints', None)\n\n        image = sample['img']\n        cls = sample['cls']\n        bboxes = sample['bboxes']\n        # load sample\n        sample_labels, sample_images, sample_masks = [], [], []\n        while len(sample_labels) < num_sample:\n            sample_labels_, sample_images_, sample_masks_ = self._pastin_load_samples()\n            sample_labels += sample_labels_\n            sample_images += sample_images_\n            sample_masks += sample_masks_\n            if len(sample_labels) == 0:\n                break\n\n        # Applies image cutout augmentation https://arxiv.org/abs/1708.04552\n        h, w = image.shape[:2]\n\n        # create random masks\n        scales = [0.75] * 2 + [0.5] * 4 + [0.25] * 4 + [0.125] * 4 + [0.0625] * 6  # image size fraction\n        for s in scales:\n            if random.random() < 0.2:\n                continue\n            mask_h = random.randint(1, int(h * s))\n            mask_w = random.randint(1, int(w * s))\n\n            # box\n            xmin = max(0, random.randint(0, w) - mask_w // 2)\n            ymin = max(0, random.randint(0, h) - mask_h // 2)\n            xmax = min(w, xmin + mask_w)\n            ymax = min(h, ymin + mask_h)\n\n            box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)\n            if len(bboxes):\n                ioa = bbox_ioa(box, bboxes)  # intersection over area\n            else:\n                ioa = np.zeros(1)\n\n            if (\n                    (ioa < 0.30).all() and len(sample_labels) and (xmax > xmin + 20) and (ymax > ymin + 20)\n            ):  # allow 30% obscuration of existing labels\n                sel_ind = random.randint(0, len(sample_labels) - 1)\n                hs, ws, cs = sample_images[sel_ind].shape\n                r_scale = min((ymax - ymin) / hs, (xmax - xmin) / ws)\n                r_w = int(ws * r_scale)\n                r_h = int(hs * r_scale)\n\n                if (r_w > 10) and (r_h > 10):\n                    r_mask = cv2.resize(sample_masks[sel_ind], (r_w, r_h))\n                    r_image = cv2.resize(sample_images[sel_ind], (r_w, r_h))\n                    temp_crop = image[ymin: ymin + r_h, xmin: xmin + r_w]\n                    m_ind = r_mask > 0\n                    if m_ind.astype(np.int_).sum() > 60:\n                        temp_crop[m_ind] = r_image[m_ind]\n                        box = np.array([xmin, ymin, xmin + r_w, ymin + r_h], dtype=np.float32)\n                        if len(bboxes):\n                            cls = np.concatenate((cls, [[sample_labels[sel_ind]]]), 0)\n                            bboxes = np.concatenate((bboxes, [box]), 0)\n                        else:\n                            cls = np.array([[sample_labels[sel_ind]]])\n                            bboxes = np.array([box])\n\n                        image[ymin: ymin + r_h, xmin: xmin + r_w] = temp_crop  # Modify on the original image\n\n        sample['img'] = image\n        sample['bboxes'] = bboxes\n        sample['cls'] = cls\n        return sample\n\n    def _pastin_load_samples(self):\n        # loads images in a 4-mosaic\n        classes4, bboxes4, segments4 = [], [], []\n        mosaic_samples = []\n        indices = random.choices(self.indices, k=4)  # 3 additional image indices\n        mosaic_samples += [self.get_sample(i) for i in indices]\n        s = self.img_size\n        mosaic_border = [-s // 2, -s // 2]\n        yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in mosaic_border]  # mosaic center x, y\n\n        for i, sample in enumerate(mosaic_samples):\n            # Load image\n            img = sample['img']\n            (h, w) = img.shape[:2]\n\n            # place img in img4\n            if i == 0:  # top left\n                img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles\n                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)\n                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)\n            elif i == 1:  # top right\n                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc\n                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h\n            elif i == 2:  # bottom left\n                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)\n                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)\n            elif i == 3:  # bottom right\n                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)\n                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)\n\n            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]\n            padw = x1a - x1b\n            padh = y1a - y1b\n\n            # Labels\n            cls, bboxes = sample['cls'], sample['bboxes']\n            bboxes = xywhn2xyxy(bboxes, w, h, padw, padh)  # normalized xywh to pixel xyxy format\n\n            classes4.append(cls)\n            bboxes4.append(bboxes)\n\n            segments = sample['segments']\n            segments_is_list = isinstance(segments, list)\n            if segments_is_list:\n                segments = [xyn2xy(x, w, h, padw, padh) for x in segments]\n                segments4.extend(segments)\n            else:\n                segments = xyn2xy(segments, w, h, padw, padh)\n                segments4.append(segments)\n\n        # Concat/clip labels\n        classes4 = np.concatenate(classes4, 0)\n        bboxes4 = np.concatenate(bboxes4, 0)\n        bboxes4 = bboxes4.clip(0, 2 * s)\n\n        if segments_is_list:\n            for x in segments4:\n                np.clip(x, 0, 2 * s, out=x)\n        else:\n            segments4 = np.concatenate(segments4, 0)\n            segments4 = segments4.clip(0, 2 * s)\n\n        # Augment\n        sample_labels, sample_images, sample_masks = \\\n            self._pastin_sample_segments(img4, classes4, bboxes4, segments4, probability=0.5)\n\n        return sample_labels, sample_images, sample_masks\n\n    def _pastin_sample_segments(self, img, classes, bboxes, segments, probability=0.5):\n        # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)\n        n = len(segments)\n        sample_labels = []\n        sample_images = []\n        sample_masks = []\n        if probability and n:\n            h, w, c = img.shape  # height, width, channels\n            for j in random.sample(range(n), k=round(probability * n)):\n                cls, l, s = classes[j], bboxes[j], segments[j]\n                box = (\n                    l[0].astype(int).clip(0, w - 1),\n                    l[1].astype(int).clip(0, h - 1),\n                    l[2].astype(int).clip(0, w - 1),\n                    l[3].astype(int).clip(0, h - 1),\n                )\n\n                if (box[2] <= box[0]) or (box[3] <= box[1]):\n                    continue\n\n                sample_labels.append(cls[0])\n\n                mask = np.zeros(img.shape, np.uint8)\n\n                cv2.drawContours(mask, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)\n                sample_masks.append(mask[box[1]: box[3], box[0]: box[2], :])\n\n                result = cv2.bitwise_and(src1=img, src2=mask)\n                i = result > 0  # pixels to replace\n                mask[i] = result[i]  # cv2.imwrite('debug.jpg', img)  # debug\n                sample_images.append(mask[box[1]: box[3], box[0]: box[2], :])\n\n        return sample_labels, sample_images, sample_masks\n\n    def hsv_augment(self, sample, hgain=0.5, sgain=0.5, vgain=0.5):\n        image = sample['img']\n        r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1  # random gains\n        hue, sat, val = cv2.split(cv2.cvtColor(image, cv2.COLOR_BGR2HSV))\n        dtype = image.dtype  # uint8\n\n        x = np.arange(0, 256, dtype=np.int16)\n        lut_hue = ((x * r[0]) % 180).astype(dtype)\n        lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)\n        lut_val = np.clip(x * r[2], 0, 255).astype(dtype)\n\n        img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)\n        cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=image)  # Modify on the original image\n\n        sample['img'] = image\n        return sample\n\n    def fliplr(self, sample):\n        # flip image left-right\n        image = sample['img']\n        image = np.fliplr(image)\n        sample['img'] = image\n\n        # flip box\n        _, w = image.shape[:2]\n        bboxes, bbox_format = sample['bboxes'], sample['bbox_format']\n        if bbox_format == \"ltrb\":\n            if len(bboxes):\n                x1 = bboxes[:, 0].copy()\n                x2 = bboxes[:, 2].copy()\n                bboxes[:, 0] = w - x2\n                bboxes[:, 2] = w - x1\n        elif bbox_format == \"xywhn\":\n            if len(bboxes):\n                bboxes[:, 0] = 1 - bboxes[:, 0]\n        else:\n            raise NotImplementedError\n        sample['bboxes'] = bboxes\n\n        # flip seg\n        if self.return_segments:\n            segment_format, segments = sample['segment_format'], sample['segments']\n            assert segment_format == 'polygon', \\\n                f'FlipLR: The segment format should be polygon, but got {segment_format}'\n            assert isinstance(segments, np.ndarray), \\\n                f\"FlipLR: segments type expect numpy.ndarray, but got {type(segments)}; \" \\\n                f\"maybe you should resample_segments before that.\"\n\n            if len(segments):\n                segments[..., 0] = w - segments[..., 0]\n\n            sample['segments'] = segments\n\n        return sample\n\n    def letterbox(self, sample, new_shape=None, xywhn2xyxy_=True, scaleup=False, only_image=False, color=(114, 114, 114)):\n        # Resize and pad image while meeting stride-multiple constraints\n        if sample['bbox_format'] == 'ltrb':\n            xywhn2xyxy_ = False\n\n        if not new_shape:\n            new_shape = self.img_size\n\n        if isinstance(new_shape, int):\n            new_shape = (new_shape, new_shape)\n\n        image = sample['img']\n        shape = image.shape[:2]  # current shape [height, width]\n\n        h, w = shape[:]\n        ori_shape = sample['ori_shape']\n        h0, w0 = ori_shape\n        hw_scale = np.array([h / h0, w / w0])\n        sample['hw_scale'] = hw_scale\n\n        # Scale ratio (new / old)\n        r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])\n        if not scaleup:  # only scale down, do not scale up (for better test mAP)\n            r = min(r, 1.0)\n\n        # Compute padding\n        new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))\n        dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding\n\n        dw /= 2  # divide padding into 2 sides\n        dh /= 2\n        hw_pad = np.array([dh, dw])\n\n        if shape != new_shape:\n            if shape[::-1] != new_unpad:  # resize\n                image = cv2.resize(image, new_unpad, interpolation=cv2.INTER_LINEAR)\n            top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))\n            left, right = int(round(dw - 0.1)), int(round(dw + 0.1))\n            image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border\n            sample['hw_pad'] = hw_pad\n        else:\n            sample['hw_pad'] = np.array([0., 0.])\n        bboxes = sample['bboxes']\n        if not only_image:\n            # convert bboxes\n            if len(bboxes):\n                if xywhn2xyxy_:\n                    bboxes = xywhn2xyxy(bboxes, r * w, r * h, padw=dw, padh=dh)\n                else:\n                    bboxes *= r\n                    bboxes[:, [0, 2]] += dw\n                    bboxes[:, [1, 3]] += dh\n                sample['bboxes'] = bboxes\n            sample['bbox_format'] = 'ltrb'\n\n            # convert segments\n            if 'segments' in sample:\n                segments, segment_format = sample['segments'], sample['segment_format']\n                assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'\n\n                if len(segments):\n                    if isinstance(segments, np.ndarray):\n                        if xywhn2xyxy_:\n                            segments[..., 0] *= w\n                            segments[..., 1] *= h\n                        else:\n                            segments *= r\n                        segments[..., 0] += dw\n                        segments[..., 1] += dh\n                    elif isinstance(segments, list):\n                        for segment in segments:\n                            if xywhn2xyxy_:\n                                segment[..., 0] *= w\n                                segment[..., 1] *= h\n                            else:\n                                segment *= r\n                            segment[..., 0] += dw\n                            segment[..., 1] += dh\n                    sample['segments'] = segments\n\n        sample['img'] = image\n        return sample\n\n    def label_norm(self, sample, xyxy2xywh_=True):\n        bbox_format = sample['bbox_format']\n        if bbox_format == \"xywhn\":\n            return sample\n\n        bboxes = sample['bboxes']\n        if len(bboxes) == 0:\n            sample['bbox_format'] = 'xywhn'\n            return sample\n\n        if xyxy2xywh_:\n            bboxes = xyxy2xywh(bboxes)  # convert xyxy to xywh\n        height, width = sample['img'].shape[:2]\n        bboxes[:, [1, 3]] /= height  # normalized height 0-1\n        bboxes[:, [0, 2]] /= width  # normalized width 0-1\n        sample['bboxes'] = bboxes\n        sample['bbox_format'] = 'xywhn'\n\n        return sample\n\n    def label_pad(self, sample, padding_size=160, padding_value=-1):\n        # create fixed label, avoid dynamic shape problem.\n        bbox_format = sample['bbox_format']\n        assert bbox_format == 'xywhn', f'The bbox format should be xywhn, but got {bbox_format}'\n\n        cls, bboxes = sample['cls'], sample['bboxes']\n        cls_pad = np.full((padding_size, 1), padding_value, dtype=np.float32)\n        bboxes_pad = np.full((padding_size, 4), padding_value, dtype=np.float32)\n        nL = len(bboxes)\n        if nL:\n            cls_pad[:min(nL, padding_size)] = cls[:min(nL, padding_size)]\n            bboxes_pad[:min(nL, padding_size)] = bboxes[:min(nL, padding_size)]\n        sample['cls'] = cls_pad\n        sample['bboxes'] = bboxes_pad\n\n        if \"segments\" in sample:\n            if sample['segment_format'] == \"mask\":\n                segments = sample['segments']\n                assert isinstance(segments, np.ndarray), \\\n                    f\"Label Pad: segments type expect numpy.ndarray, but got {type(segments)}; \" \\\n                    f\"maybe you should resample_segments before that.\"\n                assert nL == segments.shape[0], f\"Label Pad: segments len not equal bboxes\"\n                h, w = segments.shape[1:]\n                segments_pad = np.full((padding_size, h, w), padding_value, dtype=np.float32)\n                segments_pad[:min(nL, padding_size)] = segments[:min(nL, padding_size)]\n                sample['segments'] = segments_pad\n\n        return sample\n\n    def image_norm(self, sample, scale=255.0):\n        image = sample['img']\n        image = image.astype(np.float32, copy=False)\n        image /= scale\n        sample['img'] = image\n        return sample\n\n    def image_transpose(self, sample, bgr2rgb=True, hwc2chw=True):\n        image = sample['img']\n        if bgr2rgb:\n            image = image[:, :, ::-1]\n        if hwc2chw:\n            image = image.transpose(2, 0, 1)\n        sample['img'] = image\n        return sample\n\n    def segment_poly2mask(self, sample, mask_overlap, mask_ratio):\n\"\"\"convert polygon points to bitmap.\"\"\"\n        segments, segment_format = sample['segments'], sample['segment_format']\n        assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'\n        assert isinstance(segments, np.ndarray), \\\n            f\"Segment Poly2Mask: segments type expect numpy.ndarray, but got {type(segments)}; \" \\\n            f\"maybe you should resample_segments before that.\"\n\n        h, w = sample['img'].shape[:2]\n        if mask_overlap:\n            masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=mask_ratio)\n            sample['cls'] = sample['cls'][sorted_idx]\n            sample['bboxes'] = sample['bboxes'][sorted_idx]\n            sample['segments'] = masks  # (h/mask_ratio, w/mask_ratio)\n            sample['segment_format'] = 'overlap'\n        else:\n            masks = polygons2masks((h, w), segments, color=1, downsample_ratio=mask_ratio)\n            sample['segments'] = masks\n            sample['segment_format'] = 'mask'\n\n        return sample\n\n    def _img2label_paths(self, img_paths):\n        # Define label paths as a function of image paths\n        sa, sb = os.sep + \"images\" + os.sep, os.sep + \"labels\" + os.sep  # /images/, /labels/ substrings\n        return [\"txt\".join(x.replace(sa, sb, 1).rsplit(x.split(\".\")[-1], 1)) for x in img_paths]\n\n    def _get_hash(self, paths):\n        # Returns a single hash value of a list of paths (files or dirs)\n        size = sum(os.path.getsize(p) for p in paths if os.path.exists(p))  # sizes\n        h = hashlib.md5(str(size).encode())  # hash sizes\n        h.update(\"\".join(paths).encode())  # hash paths\n        return h.hexdigest()  # return hash\n\n    def _exif_size(self, img):\n        # Returns exif-corrected PIL size\n        s = img.size  # (width, height)\n        try:\n            rotation = dict(img._getexif().items())[orientation]\n            if rotation == 6:  # rotation 270\n                s = (s[1], s[0])\n            elif rotation == 8:  # rotation 90\n                s = (s[1], s[0])\n        except:\n            pass\n\n        return s\n\n    def train_collate_fn(self, batch_samples, batch_info):\n        imgs = [sample.pop('img') for sample in batch_samples]\n        labels = []\n        for i, sample in enumerate(batch_samples):\n            cls, bboxes = sample.pop('cls'), sample.pop('bboxes')\n            labels.append(np.concatenate((np.full_like(cls, i), cls, bboxes), axis=-1))\n        return_items = [np.stack(imgs, 0), np.stack(labels, 0)]\n\n        if self.return_segments:\n            masks = [sample.pop('segments', None) for sample in batch_samples]\n            return_items.append(np.stack(masks, 0))\n        if self.return_keypoints:\n            keypoints = [sample.pop('keypoints', None) for sample in batch_samples]\n            return_items.append(np.stack(keypoints, 0))\n\n        return tuple(return_items)\n\n    def test_collate_fn(self, batch_samples, batch_info):\n        imgs = [sample.pop('img') for sample in batch_samples]\n        path = [sample.pop('im_file') for sample in batch_samples]\n        hw_ori = [sample.pop('ori_shape') for sample in batch_samples]\n        hw_scale = [sample.pop('hw_scale') for sample in batch_samples]\n        pad = [sample.pop('hw_pad') for sample in batch_samples]\n        return (\n            np.stack(imgs, 0),\n            path,\n            np.stack(hw_ori, 0),\n            np.stack(hw_scale, 0),\n            np.stack(pad, 0),\n        )\n
"},{"location":"reference/data/#mindyolo.data.dataset.COCODataset.get_sample","title":"mindyolo.data.dataset.COCODataset.get_sample(index)","text":"

Get and return label information from the dataset.

Source code in mindyolo/data/dataset.py
def get_sample(self, index):\n\"\"\"Get and return label information from the dataset.\"\"\"\n    sample = deepcopy(self.labels[index])\n    if self.imgs is None:\n        path = self.img_files[index]\n        img = cv2.imread(path)  # BGR\n        assert img is not None, \"Image Not Found \" + path\n        h_ori, w_ori = img.shape[:2]  # orig hw\n        r = self.img_size / max(h_ori, w_ori)  # resize image to img_size\n        if r != 1:  # always resize down, only resize up if training with augmentation\n            interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR\n            img = cv2.resize(img, (int(w_ori * r), int(h_ori * r)), interpolation=interp)\n\n        sample['img'], sample['ori_shape'] = img, np.array([h_ori, w_ori])  # img, hw_original\n\n    else:\n        sample['img'], sample['ori_shape'] = self.imgs[index], self.img_hw_ori[index]  # img, hw_original\n\n    return sample\n
"},{"location":"reference/data/#mindyolo.data.dataset.COCODataset.segment_poly2mask","title":"mindyolo.data.dataset.COCODataset.segment_poly2mask(sample, mask_overlap, mask_ratio)","text":"

convert polygon points to bitmap.

Source code in mindyolo/data/dataset.py
def segment_poly2mask(self, sample, mask_overlap, mask_ratio):\n\"\"\"convert polygon points to bitmap.\"\"\"\n    segments, segment_format = sample['segments'], sample['segment_format']\n    assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'\n    assert isinstance(segments, np.ndarray), \\\n        f\"Segment Poly2Mask: segments type expect numpy.ndarray, but got {type(segments)}; \" \\\n        f\"maybe you should resample_segments before that.\"\n\n    h, w = sample['img'].shape[:2]\n    if mask_overlap:\n        masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=mask_ratio)\n        sample['cls'] = sample['cls'][sorted_idx]\n        sample['bboxes'] = sample['bboxes'][sorted_idx]\n        sample['segments'] = masks  # (h/mask_ratio, w/mask_ratio)\n        sample['segment_format'] = 'overlap'\n    else:\n        masks = polygons2masks((h, w), segments, color=1, downsample_ratio=mask_ratio)\n        sample['segments'] = masks\n        sample['segment_format'] = 'mask'\n\n    return sample\n
"},{"location":"reference/data/#albumentations","title":"Albumentations","text":""},{"location":"reference/data/#mindyolo.data.albumentations.Albumentations","title":"mindyolo.data.albumentations.Albumentations","text":"Source code in mindyolo/data/albumentations.py
class Albumentations:\n    # Implement Albumentations augmentation https://github.com/ultralytics/yolov5\n    # YOLOv5 Albumentations class (optional, only used if package is installed)\n    def __init__(self, size=640, random_resized_crop=True, **kwargs):\n        self.transform = None\n        prefix = _colorstr(\"albumentations: \")\n        try:\n            import albumentations as A\n\n            _check_version(A.__version__, \"1.0.3\", hard=True)  # version requirement\n            T = []\n            if random_resized_crop:\n                T.extend([\n                    A.RandomResizedCrop(height=size, width=size, scale=(0.8, 1.0), ratio=(0.9, 1.11), p=0.0),\n                ])\n            T.extend([\n                A.Blur(p=0.01),\n                A.MedianBlur(p=0.01),\n                A.ToGray(p=0.01),\n                A.CLAHE(p=0.01),\n                A.RandomBrightnessContrast(p=0.0),\n                A.RandomGamma(p=0.0),\n                A.ImageCompression(quality_lower=75, p=0.0),\n            ])\n            self.transform = A.Compose(T, bbox_params=A.BboxParams(format=\"yolo\", label_fields=[\"class_labels\"]))\n\n            print(prefix + \", \".join(f\"{x}\".replace(\"always_apply=False, \", \"\") for x in T if x.p), flush=True)\n            print(\"[INFO] albumentations load success\", flush=True)\n        except ImportError:  # package not installed, skip\n            pass\n            print(\"[WARNING] package not installed, albumentations load failed\", flush=True)\n        except Exception as e:\n            print(f\"{prefix}{e}\", flush=True)\n            print(\"[WARNING] albumentations load failed\", flush=True)\n\n    def __call__(self, sample, p=1.0, **kwargs):\n        if self.transform and random.random() < p:\n            im, bboxes, cls, bbox_format = sample['img'], sample['bboxes'], sample['cls'], sample['bbox_format']\n            assert bbox_format in (\"ltrb\", \"xywhn\")\n            if bbox_format == \"ltrb\" and bboxes.shape[0] > 0:\n                h, w = im.shape[:2]\n                bboxes = xyxy2xywh(bboxes)\n                bboxes[:, [0, 2]] /= w\n                bboxes[:, [1, 3]] /= h\n\n            new = self.transform(image=im, bboxes=bboxes, class_labels=cls)  # transformed\n\n            sample['img'] = new['image']\n            sample['bboxes'] = np.array(new['bboxes'])\n            sample['cls'] = np.array(new['class_labels'])\n            sample['bbox_format'] = \"xywhn\"\n\n        return sample\n
"},{"location":"reference/models/","title":"Models","text":""},{"location":"reference/models/#create-model","title":"Create Model","text":""},{"location":"reference/models/#mindyolo.models.model_factory.create_model","title":"mindyolo.models.model_factory.create_model(model_name, model_cfg=None, in_channels=3, num_classes=80, checkpoint_path='', **kwargs)","text":"Source code in mindyolo/models/model_factory.py
def create_model(\n    model_name: str,\n    model_cfg: dict = None,\n    in_channels: int = 3,\n    num_classes: int = 80,\n    checkpoint_path: str = \"\",\n    **kwargs,\n):\n    model_args = dict(cfg=model_cfg, num_classes=num_classes, in_channels=in_channels)\n    kwargs = {k: v for k, v in kwargs.items() if v is not None}\n\n    if not is_model(model_name):\n        raise RuntimeError(f\"Unknown model {model_name}\")\n\n    create_fn = model_entrypoint(model_name)\n    model = create_fn(**model_args, **kwargs)\n\n    if checkpoint_path:\n        assert os.path.isfile(checkpoint_path) and checkpoint_path.endswith(\n            \".ckpt\"\n        ), f\"[{checkpoint_path}] not a ckpt file.\"\n        checkpoint_param = load_checkpoint(checkpoint_path)\n        load_param_into_net(model, checkpoint_param)\n        logger.info(f\"Load checkpoint from [{checkpoint_path}] success.\")\n\n    return model\n
"},{"location":"reference/models/#yolov3","title":"YOLOV3","text":""},{"location":"reference/models/#mindyolo.models.yolov3","title":"mindyolo.models.yolov3(cfg, in_channels=3, num_classes=None, **kwargs)","text":"

Get yolov3 model.

Source code in mindyolo/models/yolov3.py
@register_model\ndef yolov3(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv3:\n\"\"\"Get yolov3 model.\"\"\"\n    model = YOLOv3(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)\n    return model\n
"},{"location":"reference/models/#yolov4","title":"YOLOV4","text":""},{"location":"reference/models/#mindyolo.models.yolov4","title":"mindyolo.models.yolov4(cfg, in_channels=3, num_classes=None, **kwargs)","text":"

Get yolov4 model.

Source code in mindyolo/models/yolov4.py
@register_model\ndef yolov4(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv4:\n\"\"\"Get yolov4 model.\"\"\"\n    model = YOLOv4(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)\n    return model\n
"},{"location":"reference/models/#yolov5","title":"YOLOV5","text":""},{"location":"reference/models/#mindyolo.models.yolov5","title":"mindyolo.models.yolov5(cfg, in_channels=3, num_classes=None, **kwargs)","text":"

Get yolov5 model.

Source code in mindyolo/models/yolov5.py
@register_model\ndef yolov5(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv5:\n\"\"\"Get yolov5 model.\"\"\"\n    model = YOLOv5(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)\n    return model\n
"},{"location":"reference/models/#yolov7","title":"YOLOV7","text":""},{"location":"reference/models/#mindyolo.models.yolov7","title":"mindyolo.models.yolov7(cfg, in_channels=3, num_classes=None, **kwargs)","text":"

Get yolov7 model.

Source code in mindyolo/models/yolov7.py
@register_model\ndef yolov7(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv7:\n\"\"\"Get yolov7 model.\"\"\"\n    model = YOLOv7(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)\n    return model\n
"},{"location":"reference/models/#yolov8","title":"YOLOV8","text":""},{"location":"reference/models/#mindyolo.models.yolov8","title":"mindyolo.models.yolov8(cfg, in_channels=3, num_classes=None, **kwargs)","text":"

Get yolov8 model.

Source code in mindyolo/models/yolov8.py
@register_model\ndef yolov8(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv8:\n\"\"\"Get yolov8 model.\"\"\"\n    model = YOLOv8(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)\n    return model\n
"},{"location":"reference/models/#yolox","title":"YOLOX","text":""},{"location":"reference/models/#mindyolo.models.yolox","title":"mindyolo.models.yolox(cfg, in_channels=3, num_classes=None, **kwargs)","text":"

Get yolox model.

Source code in mindyolo/models/yolox.py
@register_model\ndef yolox(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOX:\n\"\"\"Get yolox model.\"\"\"\n    model = YOLOX(cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)\n    return model\n
"},{"location":"tutorials/configuration/","title":"Configuration","text":"

MindYOLO supports parameter parsing from both yaml files and command lines. The parameters which are fixed, complex, highly related to model or with nested structure are placed in yaml files. While the simpler ones or parameters variants with actual cases could be passed in from the command line.

The following takes yolov3 as an example to explain how to configure the corresponding parameters.

"},{"location":"tutorials/configuration/#parameter-inheritance-relationship","title":"Parameter Inheritance Relationship","text":"

As follows, the parameter priority is from high to low. When a parameter with the same name appears, the low-priority parameter will be overwritten by the high-priority parameter.

  • Parameters inputted with user command lines
  • Default parameters set in parser from .py files
  • Parameters in yaml files specified by user command lines
  • Parameters in yaml files set by __BASE__ contained in yaml files specified by user command lines. Take yolov3 as an example, it contains:
    __BASE__: [\n'../coco.yaml',\n'./hyp.scratch.yaml',\n]\n
"},{"location":"tutorials/configuration/#basic-parameters","title":"Basic Parameters","text":""},{"location":"tutorials/configuration/#parameter-description","title":"Parameter Description","text":"
  • device_target: device used, Ascend/GPU/CPU
  • save_dir: the path to save the running results, the default is ./runs
  • log_interval: step interval to print logs, the default is 100
  • is_parallel: whether to perform distributed training, the default is False
  • ms_mode: whether to use static graph mode (0) or dynamic graph mode (1), the default is 0.
  • config: yaml configuration file path
  • per_batch_size: batch size of each card, default is 32
  • epochs: number of training epochs, default is 300
  • ...
"},{"location":"tutorials/configuration/#parse-parameter-settings","title":"Parse parameter settings","text":"

This part of the parameters is usually passed in from the command line. Examples are as follows:

mpirun --allow-run-as-root -n 8 python train.py --config ./configs/yolov7/yolov7.yaml  --is_parallel True --log_interval 50\n
"},{"location":"tutorials/configuration/#dataset","title":"Dataset","text":""},{"location":"tutorials/configuration/#parameter-description_1","title":"Parameter Description","text":"
  • dataset_name: dataset name
  • train_set: the path where the training set is located
  • val_set: the path where the verification set is located
  • test_set: the path where the test set is located
  • nc: number of categories in the data set
  • names: category name -...
"},{"location":"tutorials/configuration/#yaml-file-sample","title":"Yaml file sample","text":"

This part of the parameters is defined in configs/coco.yaml, and the data set path usually needs to be modified.

data:\ndataset_name: coco\n\ntrain_set: ./coco/train2017.txt  # 118287 images\nval_set: ./coco/val2017.txt  # 5000 images\ntest_set: ./coco/test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794\n\nnc: 80\n\n# class names\nnames: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',\n'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',\n'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',\n'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',\n'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',\n'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',\n'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',\n'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',\n'hair drier', 'toothbrush' ]\n
"},{"location":"tutorials/configuration/#data-augmentation","title":"Data Augmentation","text":""},{"location":"tutorials/configuration/#parameter-description_2","title":"Parameter Description","text":"
  • num_parallel_workers: number of worker processes reading data
  • train_transformers: data enhancement during training process
  • test_transformers: verification process data enhancement -...
"},{"location":"tutorials/configuration/#yaml-file-sample_1","title":"Yaml file sample","text":"

This part of the parameters is defined in configs/yolov3/hyp.scratch.yaml, where train_transformers and test_transformers are lists composed of dictionaries, each dictionary contains data enhancement operations name, probability of occurrence and parameters related to the enhancement method

data:\nnum_parallel_workers: 4\n\ntrain_transforms:\n- { func_name: mosaic, prob: 1.0, mosaic9_prob: 0.0, translate: 0.1, scale: 0.9 }\n- { func_name: mixup, prob: 0.1, alpha: 8.0, beta: 8.0, needed_mosaic: True }\n- { func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4 }\n- { func_name: label_norm, xyxy2xywh_: True }\n- { func_name: albumentations }\n- { func_name: fliplr, prob: 0.5 }\n- { func_name: label_pad, padding_size: 160, padding_value: -1 }\n- { func_name: image_norm, scale: 255. }\n- { func_name: image_transpose, bgr2rgb: True, hwc2chw: True }\n\ntest_transforms:\n- { func_name: letterbox, scaleup: False }\n- { func_name: label_norm, xyxy2xywh_: True }\n- { func_name: label_pad, padding_size: 160, padding_value: -1 }\n- { func_name: image_norm, scale: 255. }\n- { func_name: image_transpose, bgr2rgb: True, hwc2chw: True }\n
"},{"location":"tutorials/configuration/#model","title":"Model","text":""},{"location":"tutorials/configuration/#parameter-description_3","title":"Parameter Description","text":"
  • model_name: model name
  • depth_multiple: model depth factor
  • width_multiple: model width factor
  • stride: feature map downsampling multiple
  • anchors: default anchor box
  • backbone: model backbone network
  • head: model detection head
"},{"location":"tutorials/configuration/#yaml-file-sample_2","title":"Yaml file sample","text":"

This part of the parameters is defined in configs/yolov3/yolov3.yaml. The network is constructed based on the backbone and head parameters. The parameters are presented in the form of a nested list, with each line representing a The layer module contains 4 parameters, namely the input layer number (-1 represents the previous layer), the number of module repetitions, the module name and the corresponding parameters of the module. Users can also define and register networks directly in py files without resorting to yaml files.

network:\nmodel_name: yolov3\n\ndepth_multiple: 1.0  # model depth multiple\nwidth_multiple: 1.0  # layer channel multiple\nstride: [8, 16, 32]\nanchors:\n- [10,13, 16,30, 33,23]  # P3/8\n- [30,61, 62,45, 59,119]  # P4/16\n- [116,90, 156,198, 373,326]  # P5/32\n\n# darknet53 backbone\nbackbone:\n# [from, number, module, args]\n[[-1, 1, ConvNormAct, [32, 3, 1]],  # 0\n[-1, 1, ConvNormAct, [64, 3, 2]],  # 1-P1/2\n[-1, 1, Bottleneck, [64]],\n[-1, 1, ConvNormAct, [128, 3, 2]],  # 3-P2/4\n[-1, 2, Bottleneck, [128]],\n[-1, 1, ConvNormAct, [256, 3, 2]],  # 5-P3/8\n[-1, 8, Bottleneck, [256]],\n[-1, 1, ConvNormAct, [512, 3, 2]],  # 7-P4/16\n[-1, 8, Bottleneck, [512]],\n[-1, 1, ConvNormAct, [1024, 3, 2]],  # 9-P5/32\n[-1, 4, Bottleneck, [1024]],  # 10\n]\n\n# YOLOv3 head\nhead:\n[[-1, 1, Bottleneck, [1024, False]],\n[-1, 1, ConvNormAct, [512, 1, 1]],\n[-1, 1, ConvNormAct, [1024, 3, 1]],\n[-1, 1, ConvNormAct, [512, 1, 1]],\n[-1, 1, ConvNormAct, [1024, 3, 1]],  # 15 (P5/32-large)\n\n[-2, 1, ConvNormAct, [256, 1, 1]],\n[-1, 1, Upsample, [None, 2, 'nearest']],\n[[-1, 8], 1, Concat, [1]],  # cat backbone P4\n[-1, 1, Bottleneck, [512, False]],\n[-1, 1, Bottleneck, [512, False]],\n[-1, 1, ConvNormAct, [256, 1, 1]],\n[-1, 1, ConvNormAct, [512, 3, 1]],  # 22 (P4/16-medium)\n\n[-2, 1, ConvNormAct, [128, 1, 1]],\n[-1, 1, Upsample, [None, 2, 'nearest']],\n[[-1, 6], 1, Concat, [1]],  # cat backbone P3\n[-1, 1, Bottleneck, [256, False]],\n[-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)\n\n[[27, 22, 15], 1, YOLOv3Head, [nc, anchors, stride]],   # Detect(P3, P4, P5)\n]\n

"},{"location":"tutorials/configuration/#loss-function","title":"Loss function","text":""},{"location":"tutorials/configuration/#parameter-description_4","title":"Parameter Description","text":"
  • name: loss function name
  • box: box loss weight
  • cls: class loss weight
  • cls_pw: class loss positive sample weight
  • obj: object loss weight
  • obj_pw: object loss positive sample weight
  • fl_gamma: focal loss gamma
  • anchor_t: anchor shape proportion threshold
  • label_smoothing: label smoothing value
"},{"location":"tutorials/configuration/#yaml-file-sample_3","title":"Yaml file sample","text":"

This part of the parameters is defined in configs/yolov3/hyp.scratch.yaml

loss:\nname: YOLOv7Loss\nbox: 0.05  # box loss gain\ncls: 0.5  # cls loss gain\ncls_pw: 1.0  # cls BCELoss positive_weight\nobj: 1.0  # obj loss gain (scale with pixels)\nobj_pw: 1.0  # obj BCELoss positive_weight\nfl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)\nanchor_t: 4.0  # anchor-multiple threshold\nlabel_smoothing: 0.0 # label smoothing epsilon\n
"},{"location":"tutorials/configuration/#optimizer","title":"Optimizer","text":""},{"location":"tutorials/configuration/#parameter-description_5","title":"Parameter Description","text":"
  • optimizer: optimizer name.
  • lr_init: initial value of learning rate
  • warmup_epochs: number of warmup epochs
  • warmup_momentum: initial value of warmup momentum
  • warmup_bias_lr: initial value of warmup bias learning rate
  • min_warmup_step: minimum number of warmup steps
  • group_param: parameter grouping strategy
  • gp_weight_decay: Group parameter weight decay coefficient
  • start_factor: initial learning rate factor
  • end_factor: end learning rate factor
  • momentum: momentum of the moving average
  • loss_scale: loss scaling coefficient
  • nesterov: Whether to use the Nesterov Accelerated Gradient (NAG) algorithm to update the gradient.
"},{"location":"tutorials/configuration/#yaml-file-sample_4","title":"Yaml file sample","text":"

This part of the parameters is defined in configs/yolov3/hyp.scratch.yaml. In the following example, the initial learning rate after the warmup stage is lr_init * start_factor = 0.01 * 1.0 = 0.01, the final learning rate is lr_init * end_factor = 0.01 * 0.01 = 0.0001

optimizer:\noptimizer: momentum\nlr_init: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)\nmomentum: 0.937  # SGD momentum/Adam beta1\nnesterov: True # update gradients with NAG(Nesterov Accelerated Gradient) algorithm\nloss_scale: 1.0 # loss scale for optimizer\nwarmup_epochs: 3  # warmup epochs (fractions ok)\nwarmup_momentum: 0.8  # warmup initial momentum\nwarmup_bias_lr: 0.1  # warmup initial bias lr\nmin_warmup_step: 1000 # minimum warmup step\ngroup_param: yolov7 # group param strategy\ngp_weight_decay: 0.0005  # group param weight decay 5e-4\nstart_factor: 1.0\nend_factor: 0.01\n
"},{"location":"tutorials/data_augmentation/","title":"Data Augmentation","text":""},{"location":"tutorials/data_augmentation/#list-of-data-enhancement-methods-that-come-with-the-package","title":"List of data enhancement methods that come with the package","text":"Data Enhancement Method Name Summary Explanation mosaic randomly select mosaic4 and mosaic9 mosaic4 4-part splicing mosaic9 9-point splicing mixup linearly mix two images pastein clipping enhancement random_perspective random perspective transformation hsv_augment random color transformation fliplr flip horizontally flipud vertical flip letterbox scale and fill label_norm label normalization and coordinates normalized to 0-1 to range label_pad fill label information into a fixed-size array image_norm image data normalization image_transpose channel transpose and dimension transpose albumentations albumentations data enhancement

These data augmentation functions are defined in mindyolo/data/dataset.py.

"},{"location":"tutorials/data_augmentation/#instructions","title":"Instructions","text":"

The MindYOLO data enhancement method is configured in the yaml file. For example, to add a data enhancement during the training process, you need to add a dictionary list under the data.train_transforms field of the yaml file. The data enhancement methods are listed from top to bottom.

A typical data enhancement method configuration dictionary must have func_name, which represents the name of the applied data enhancement method, and then lists the parameters that need to be set for the method. If no parameter item is configured in the data enhancement configuration dictionary, the data enhancement method will be selected by default. value.

Data enhancement common configuration dictionary:

- {func_name: data enhancement method name 1, args11=x11, args12=x12, ..., args1n=x1n}\n- {func_name: data enhancement method name 2, args21=x21, args22=x22, ..., args2n=x2n}\n...\n- {func_name: data enhancement method name n, argsn1=xn1, argsn2=xn2, ..., argsnn=xnn}\n

Example enhanced with YOLOv7 training data:

#File directory: configs/yolov7/hyp.scratch.tiny.yaml (https://github.com/mindspore-lab/mindyolo/blob/master/configs/yolov7/hyp.scratch.tiny.yaml)\ntrain_transforms:\n- {func_name: mosaic, prob: 1.0, mosaic9_prob: 0.2, translate: 0.1, scale: 0.5}\n- {func_name: mixup, prob: 0.05, alpha: 8.0, beta: 8.0, needed_mosaic: True}\n- {func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4}\n- {func_name: pastein, prob: 0.05, num_sample: 30}\n- {func_name: label_norm, xyxy2xywh_: True}\n- {func_name: fliplr, prob: 0.5}\n- {func_name: label_pad, padding_size: 160, padding_value: -1}\n- {func_name: image_norm, scale: 255.}\n- {func_name: image_transpose, bgr2rgb: True, hwc2chw: True}\n
Note: func_name represents the name of the data enhancement method, prob, mosaic9_prob, translate, and scale are the method parameters. Among them, prob is a parameter common to all methods, indicating the execution probability of the data enhancement method. The default value is 1

The specific operations performed by the above yaml file are as follows:

  • mosaic: Perform mosaic operation on the input image with a probability of 1.0, that is, splicing 4 different images into one image. mosaic9_prob represents the probability of splicing using the 9-square grid method, and translate and scale represent the degree of random translation and scaling respectively. as the picture shows:

  • mixup: Perform a mixup operation on the input image with a probability of 0.05, that is, mix two different images. Among them, alpha and beta represent the mixing coefficient, and needed_mosaic represents whether mosaic needs to be used for mixing.

  • hsv_augment: HSV enhancement, adjust the HSV color space of the input image with a probability of 1.0 to increase data diversity. Among them, hgain, sgain and vgain represent the degree of adjustment of H, S and V channels respectively.

  • pastein: randomly paste some samples into the input image with a probability of 0.05. Among them, num_sample represents the number of randomly posted samples.

  • label_norm: Convert the input label from the format of (x1, y1, x2, y2) to the format of (x, y, w, h).

  • fliplr: Flip the input image horizontally with a probability of 0.5 to increase data diversity.

  • label_pad: Pad the input labels so that each image has the same number of labels. padding_size represents the number of labels after padding, and padding_value represents the value of padding.

  • image_norm: Scale the input image pixel value from the range [0, 255] to the range [0, 1].

  • image_transpose: Convert the input image from BGR format to RGB format, and convert the number of channels of the image from HWC format to CHW format.

Test data enhancement needs to be marked with the test_transforms field, and the configuration method is the same as training.

"},{"location":"tutorials/data_augmentation/#custom-data-enhancement","title":"Custom data enhancement","text":"

Writing Guide:

  • Add custom data enhancement methods to the COCODataset class in the mindyolo/data/dataset.py file
  • Inputs to data augmentation methods usually include images, labels, and custom parameters. -Write function body content and customize output

A typical data enhancement method:

#Add submethods in mindyolo/data/dataset.py COCODataset\ndef data_trans_func(self, image, labels, args1=x1, args2=x2, ..., argsn=xn):\n    # Data enhancement logic\n    ...\n    return image, labels\n
Customize a data enhancement function whose function is rotation
#mindyolo/data/dataset.py\ndef rotate(self, image, labels, angle):\n    # rotate image\n    image = np.rot90(image, angle // 90)\n    if len(labels):\n        if angle == 90:\n            labels[:, 0], labels[:, 1] = 1 - labels[:, 1], labels[:, 0]\n        elif angle == 180:\n            labels[:, 0], labels[:, 1] = 1 - labels[:, 0], 1 - labels[:, 1]\n        elif angle == 270:\n            labels[:, 0], labels[:, 1] = labels[:, 1], 1 - labels[:, 0]\n    return image, labels\n

user's guidance: - Define this data augmentation method in the form of a dictionary in the model's yaml file. Same usage as described above

    - {func_name: rotate, angle: 90}\n

Show results:

"},{"location":"tutorials/deployment/","title":"Deployment","text":""},{"location":"tutorials/deployment/#dependencies","title":"Dependencies","text":"
pip install -r requirement.txt\n
"},{"location":"tutorials/deployment/#mindspore-lite-environment-preparation","title":"MindSpore Lite environment preparation","text":"

Reference: Lite environment configuration Note: The python environment that MindSpore Lite is adapted to is 3.7. Please prepare the python3.7 environment before installing Lite

  1. Depending on the environment, download the matching tar.gz package and whl package.

  2. Unzip the tar.gz package and install the corresponding version of the whl package

    tar -zxvf mindspore_lite-2.0.0a0-cp37-cp37m-{os}_{platform}_64.tar.gz\npip install mindspore_lite-2.0.0a0-cp37-cp37m-{os}_{platform}_64.whl\n

  3. Configure Lite environment variables LITE_HOME is the folder path extracted from tar.gz. It is recommended to use the absolute path.
    export LITE_HOME=/path/to/mindspore-lite-{version}-{os}-{platform}\nexport LD_LIBRARY_PATH=$LITE_HOME/runtime/lib:$LITE_HOME/tools/converter/lib:$LD_LIBRARY_PATH\nexport PATH=$LITE_HOME/tools/converter/converter:$LITE_HOME/tools/benchmark:$PATH\n
"},{"location":"tutorials/deployment/#quick-start","title":"Quick Start","text":""},{"location":"tutorials/deployment/#model-conversion","title":"Model conversion","text":"

Convert ckpt model to mindir model, this step can be run on CPU/Ascend910

python ./deploy/export.py --config ./path_to_config/model.yaml --weight ./path_to_ckpt/weight.ckpt --per_batch_size 1 --file_format MINDIR --device_target [CPU/Ascend]\ne.g.\n#Run on CPU\npython ./deploy/export.py --config ./configs/yolov5/yolov5n.yaml --weight yolov5n_300e_mAP273-9b16bd7b.ckpt --per_batch_size 1 --file_format MINDIR --device_target CPU\n# Run on Ascend\npython ./deploy/export.py --config ./configs/yolov5/yolov5n.yaml --weight yolov5n_300e_mAP273-9b16bd7b.ckpt --per_batch_size 1 --file_format MINDIR --device_target Ascend\n

"},{"location":"tutorials/deployment/#lite-test","title":"Lite Test","text":"
python deploy/test.py --model_type Lite --model_path ./path_to_mindir/weight.mindir --config ./path_to_config/yolo.yaml\ne.g.\npython deploy/test.py --model_type Lite --model_path ./yolov5n.mindir --config ./configs/yolov5/yolov5n.yaml\n
"},{"location":"tutorials/deployment/#lite-predict","title":"Lite Predict","text":"
python ./deploy/predict.py --model_type Lite --model_path ./path_to_mindir/weight.mindir --config ./path_to_conifg/yolo.yaml --image_path ./path_to_image/image.jpg\ne.g.\npython deploy/predict.py --model_type Lite --model_path ./yolov5n.mindir --config ./configs/yolov5/yolov5n.yaml --image_path ./coco/image/val2017/image.jpg\n
"},{"location":"tutorials/deployment/#script-description","title":"Script description","text":"
  • predict.py supports single image inference
  • test.py supports COCO data set inference
  • Note: currently only supports inference on Ascend 310
"},{"location":"tutorials/deployment/#mindx-deployment","title":"MindX Deployment","text":""},{"location":"tutorials/deployment/#environment-configuration","title":"Environment configuration","text":"

Reference: MindX environment preparation Note: MindX currently supports python version 3.9. Please prepare the python3.9 environment before installing MindX

  1. Obtain the [Environment Installation Package] (https://www.hiascend.com/software/mindx-sdk/commercial) from the MindX official website. Currently, version 3.0.0 of MindX infer is supported.

  2. Jump to the Download page Download Ascend-mindxsdk-mxmanufacture_{version}_linux-{arch}.run

  3. Place the installation package in the Ascend310 machine directory and unzip it

  4. If you are not a root user, you need to add executable permissions to the package:

    chmod +x Ascend-mindxsdk-mxmanufacture_{version}_linux-{arch}.run\n

  5. Enter the upload path of the development kit package and install the mxManufacture development kit package.
    ./Ascend-mindxsdk-mxmanufacture_{version}_linux-{arch}.run --install\n
    After the installation is completed, if the following echo appears, it means that the software was successfully installed.
    The installation is successful\n
    After the installation is complete, the mxManufacture software directory structure is as follows:
    .\n\u251c\u2500\u2500 bin\n\u251c\u2500\u2500 config\n\u251c\u2500\u2500 filelist.txt\n\u251c\u2500\u2500 include\n\u251c\u2500\u2500 lib\n\u251c\u2500\u2500 opensource\n\u251c\u2500\u2500 operators\n\u251c\u2500\u2500 python\n\u251c\u2500\u2500 samples\n\u251c\u2500\u2500 set_env.sh\n\u251c\u2500\u2500 toolkit\n\u2514\u2500\u2500 version.info\n
  6. Enter the installation directory of mxmanufacture and run the following command to make the MindX SDK environment variables take effect.
    source set_env.sh\n
  7. Enter ./mxVision-3.0.0/python/ and install mindx-3.0.0-py3-none-any.whl
    pip install mindx-3.0.0-py3-none-any.whl\n
"},{"location":"tutorials/deployment/#model-conversion_1","title":"Model conversion","text":"
  1. Convert ckpt model to air model. This step needs to be performed on Ascend910.

    python ./deploy/export.py --config ./path_to_config/model.yaml --weight ./path_to_ckpt/weight.ckpt --per_batch_size 1 --file_format AIR\ne.g.\npython ./deploy/export.py --config ./configs/yolov5/yolov5n.yaml --weight yolov5n_300e_mAP273-9b16bd7b.ckpt --per_batch_size 1 --file_format AIR\n
    yolov7 needs to run export on an Ascend910 machine with version 2.0 or above

  2. To convert the air model to the om model, use atc conversion tool. This step requires the installation of MindX Environment, running on Ascend310

    atc --model=./path_to_air/weight.air --framework=1 --output=yolo --soc_version=Ascend310\n

"},{"location":"tutorials/deployment/#mindx-test","title":"MindX Test","text":"

Infer COCO data:

python ./deploy/test.py --model_type MindX --model_path ./path_to_om/weight.om --config ./path_to_config/yolo.yaml\ne.g.\npython ./deploy/test.py --model_type MindX --model_path ./yolov5n.om --config ./configs/yolov5/yolov5n.yaml\n

"},{"location":"tutorials/deployment/#mindx-predict","title":"MindX Predict","text":"

Infer a single image:

python ./deploy/predict.py --model_type MindX --model_path ./path_to_om/weight.om --config ./path_to_config/yolo.yaml --image_path ./path_to_image/image.jpg\ne.g.\npython ./deploy/predict.py --model_type MindX --model_path ./yolov5n.om --config ./configs/yolov5/yolov5n.yaml --image_path ./coco/image/val2017/image.jpg\n

"},{"location":"tutorials/deployment/#mindir-deployment","title":"MindIR Deployment","text":""},{"location":"tutorials/deployment/#environmental-requirements","title":"Environmental requirements","text":"

mindspore>=2.1

"},{"location":"tutorials/deployment/#precautions","title":"Precautions","text":"
  1. Currently only supports Predict

  2. Theoretically, it can also run on Ascend910, but it has not been tested.

"},{"location":"tutorials/deployment/#model-conversion_2","title":"Model conversion","text":"

Convert the ckpt model to the mindir model, this step can be run on the CPU

python ./deploy/export.py --config ./path_to_config/model.yaml --weight ./path_to_ckpt/weight.ckpt --per_batch_size 1 --file_format MINDIR --device_target CPU\ne.g.\n#Run on CPU\npython ./deploy/export.py --config ./configs/yolov5/yolov5n.yaml --weight yolov5n_300e_mAP273-9b16bd7b.ckpt --per_batch_size 1 --file_format MINDIR --device_target CPU\n

"},{"location":"tutorials/deployment/#mindir-test","title":"MindIR Test","text":"

Coming soon

"},{"location":"tutorials/deployment/#mindir-predict","title":"MindIR Predict","text":"

Infer a single image:

python ./deploy/predict.py --model_type MindIR --model_path ./path_to_mindir/weight.mindir --config ./path_to_conifg/yolo.yaml --image_path ./path_to_image/image.jpg\ne.g.\npython deploy/predict.py --model_type MindIR --model_path ./yolov5n.mindir --config ./configs/yolov5/yolov5n.yaml --image_path ./coco/image/val2017/image.jpg\n

"},{"location":"tutorials/deployment/#onnx-deployment","title":"ONNX deployment","text":""},{"location":"tutorials/deployment/#environment-configuration_1","title":"Environment configuration","text":"
pip install onnx>=1.9.0\npip install onnxruntime>=1.8.0\n
"},{"location":"tutorials/deployment/#precautions_1","title":"Precautions","text":"
  1. Currently not all mindyolo supports ONNX export and inference (only YoloV3 is used as an example)

  2. Currently only supports the Predict function

  3. Exporting ONNX requires adjusting the nn.SiLU operator and using the underlying implementation of the sigmoid operator.

For example: add the following custom layer and replace all nn.SiLU in mindyolo

class EdgeSiLU(nn.Cell):\n\"\"\"\n    SiLU activation function: x * sigmoid(x). To support for onnx export with nn.SiLU.\n    \"\"\"\n\n    def __init__(self):\n        super().__init__()\n\n    def construct(self, x):\n        return x * ops.sigmoid(x)\n

"},{"location":"tutorials/deployment/#model-conversion_3","title":"Model conversion","text":"

Convert the ckpt model to an ONNX model. This step and the Test step can only be run on the CPU.

python ./deploy/export.py --config ./path_to_config/model.yaml --weight ./path_to_ckpt/weight.ckpt --per_batch_size 1 --file_format ONNX --device_target [CPU]\ne.g.\n#Run on CPU\npython ./deploy/export.py --config ./configs/yolov3/yolov3.yaml --weight yolov3-darknet53_300e_mAP455-adfb27af.ckpt --per_batch_size 1 --file_format ONNX --device_target CPU\n

"},{"location":"tutorials/deployment/#onnx-test","title":"ONNX Test","text":"

Coming soon

"},{"location":"tutorials/deployment/#onnxruntime-predict","title":"ONNXRuntime Predict","text":"

Infer a single image:

python ./deploy/predict.py --model_type ONNX --model_path ./path_to_onnx_model/model.onnx --config ./path_to_config/yolo.yaml --image_path ./path_to_image/image.jpg\ne.g.\npython ./deploy/predict.py --model_type ONNX --model_path ./yolov3.onnx --config ./configs/yolov3/yolov3.yaml --image_path ./coco/image/val2017/image.jpg\n

"},{"location":"tutorials/deployment/#standard-and-supported-model-libraries","title":"Standard and supported model libraries","text":"
  • YOLOv8
  • YOLOv7
  • YOLOX
  • YOLOv5
  • YOLOv4
  • YOLOv3
Name Scale Context ImageSize Dataset Box mAP (%) Params FLOPs Recipe Download YOLOv8 N D310x1-G 640 MS COCO 2017 37.2 3.2M 8.7G yaml ckpt mindir YOLOv8 S D310x1-G 640 MS COCO 2017 44.6 11.2M 28.6G yaml ckpt mindir YOLOv8 M D310x1-G 640 MS COCO 2017 50.5 25.9M 78.9G yaml ckpt mindir YOLOv8 L D310x1-G 640 MS COCO 2017 52.8 43.7M 165.2G yaml ckpt mindir YOLOv8 X D310x1-G 640 MS COCO 2017 53.7 68.2M 257.8G yaml ckpt mindir YOLOv7 Tiny D310x1-G 640 MS COCO 2017 37.5 6.2M 13.8G yaml ckpt mindir YOLOv7 L D310x1-G 640 MS COCO 2017 50.8 36.9M 104.7G yaml ckpt mindir YOLOv7 X D310x1-G 640 MS COCO 2017 52.4 71.3M 189.9G yaml ckpt mindir YOLOv5 N D310x1-G 640 MS COCO 2017 27.3 1.9M 4.5G yaml ckpt mindir YOLOv5 S D310x1-G 640 MS COCO 2017 37.6 7.2M 16.5G yaml ckpt mindir YOLOv5 M D310x1-G 640 MS COCO 2017 44.9 21.2M 49.0G yaml ckpt mindir YOLOv5 L D310x1-G 640 MS COCO 2017 48.5 46.5M 109.1G yaml ckpt mindir YOLOv5 X D310x1-G 640 MS COCO 2017 50.5 86.7M 205.7G yaml ckpt mindir YOLOv4 CSPDarknet53 D310x1-G 608 MS COCO 2017 45.4 27.6M 52G yaml ckpt mindir YOLOv4 CSPDarknet53(silu) D310x1-G 640 MS COCO 2017 45.8 27.6M 52G yaml ckpt mindir YOLOv3 Darknet53 D310x1-G 640 MS COCO 2017 45.5 61.9M 156.4G yaml ckpt mindir YOLOX N D310x1-G 416 MS COCO 2017 24.1 0.9M 1.1G yaml ckpt mindir YOLOX Tiny D310x1-G 416 MS COCO 2017 33.3 5.1M 6.5G yaml ckpt mindir YOLOX S D310x1-G 640 MS COCO 2017 40.7 9.0M 26.8G yaml ckpt mindir YOLOX M D310x1-G 640 MS COCO 2017 46.7 25.3M 73.8G yaml ckpt mindir YOLOX L D310x1-G 640 MS COCO 2017 49.2 54.2M 155.6G yaml ckpt mindir YOLOX X D310x1-G 640 MS COCO 2017 51.6 99.1M 281.9G yaml ckpt mindir YOLOX Darknet53 D310x1-G 640 MS COCO 2017 47.7 63.7M 185.3G yaml ckpt mindir"},{"location":"tutorials/finetune/","title":"Fine-tuning","text":""},{"location":"tutorials/finetune/#custom-dataset-finetune-process","title":"Custom Dataset Finetune Process","text":"

This article takes the Safety Hat Wearing Detection Dataset (SHWD) as an example to introduce the main process of finetune on MindYOLO with a custom data set.

"},{"location":"tutorials/finetune/#dataset-conversion","title":"Dataset Conversion","text":"

SHWD Dataset uses data labels in voc format, and its file directory is as follows:

             Root directory\n                \u251c\u2500\u2500 Comments\n                \u2502 \u251c\u2500\u2500 000000.xml\n                \u2502 \u2514\u2500\u2500 000002.xml\n                \u251c\u2500\u2500 Image Collection\n                \u2502 \u2514\u2500\u2500 Main\n                \u2502 \u251c\u2500\u2500 test.txt\n                \u2502 \u251c\u2500\u2500 train.txt\n                \u2502 \u251c\u2500\u2500 trainval.txt\n                \u2502 \u2514\u2500\u2500 val.txt\n                \u2514\u2500\u2500 JPEG image\n                        \u251c\u2500\u2500 000000.jpg\n                        \u2514\u2500\u2500 000002.jpg\n
The xml file under the Annotations folder contains annotation information for each picture. The main contents are as follows:
<annotation>\n  <folder>JPEGImages</folder>\n  <filename>000377.jpg</filename>\n  <path>F:\\baidu\\VOC2028\\JPEGImages\\000377.jpg</path>\n  <source>\n    <database>Unknown</database>\n  </source>\n  <size>\n    <width>750</width>\n    <height>558</height>\n    <depth>3</depth>\n  </size>\n  <segmented>0</segmented>\n  <object>\n    <name>hat</name>\n    <pose>Unspecified</pose>\n    <truncated>0</truncated>\n    <difficult>0</difficult>\n    <bndbox>\n      <xmin>142</xmin>\n      <ymin>388</ymin>\n      <xmax>177</xmax>\n      <ymax>426</ymax>\n    </bndbox>\n  </object>\n
It contains multiple objects. The name in object is the category name, and xmin, ymin, xmax, and ymax are the coordinates of the upper left corner and lower right corner of the detection frame.

The data set format supported by MindYOLO is YOLO format. For details, please refer to Data Preparation

Since MindYOLO selects the image name as image_id during the verification phase, the image name can only be of numeric type, not of string type, and the image needs to be renamed. Conversion to SHWD data set format includes the following steps: * Copy the image to the corresponding path and rename it * Write the relative path of the image in the corresponding txt file in the root directory * Parse the xml file and generate the corresponding txt annotation file under the corresponding path * The verification set also needs to generate the final json file

For detailed implementation, please refer to convert_shwd2yolo.py. The operation method is as follows:

python examples/finetune_SHWD/convert_shwd2yolo.py --root_dir /path_to_shwd/SHWD\n
Running the above command will generate a SHWD data set in yolo format in the same directory without changing the original data set.

"},{"location":"tutorials/finetune/#write-yaml-configuration-file","title":"Write yaml configuration file","text":"

The configuration file mainly contains the corresponding parameters related to the data set, data enhancement, loss, optimizer, and model structure. Since MindYOLO provides a yaml file inheritance mechanism, you can only write the parameters that need to be adjusted as yolov7-tiny_shwd.yaml and inherit the native ones provided by MindYOLO. yaml file, its content is as follows:

__BASE__: [\n  '../../configs/yolov7/yolov7-tiny.yaml',\n]\n\nper_batch_size: 16 # Single card batchsize, total batchsize=per_batch_size * device_num\nimg_size: 640 # image sizes\nweight: ./yolov7-tiny_pretrain.ckpt\nstrict_load: False # Whether to strictly load the internal parameters of ckpt. The default is True. If set to False, when the number of classifications is inconsistent, the weight of the last layer of classifiers will be discarded.\nlog_interval: 10 #Print the loss result every log_interval iterations\n\ndata:\n  dataset_name: shwd\n  train_set: ./SHWD/train.txt # Actual training data path\n  val_set: ./SHWD/val.txt\n  test_set: ./SHWD/val.txt\n  nc: 2 # Number of categories\n  # class names\n  names: [ 'person', 'hat' ] # The name of each category\n\noptimizer:\n  lr_init: 0.001 # initial learning rate\n
* __BASE__ is a list, indicating the path of the inherited yaml file. Multiple yaml files can be inherited. * per_batch_size and img_size respectively represent the batch_size on a single card and the image size used for data processing images. * weight is the file path of the pre-trained model mentioned above, and strict_load means discarding parameters with inconsistent shapes. * log_interval represents the log printing interval * All parameters under the data field are data set related parameters, where dataset_name is the name of the custom data set, train_set, val_set, and test_set are the txt file paths that save the training set, validation set, and test set image paths respectively, nc is the number of categories, and names is classification name * lr_init under the optimizer field is the initial learning rate after warm_up, which is 10 times smaller than the default parameters.

For parameter inheritance relationship and parameter description, please refer to Configuration.

"},{"location":"tutorials/finetune/#download-pre-trained-model","title":"Download pre-trained model","text":"

You can choose the Model Warehouse provided by MindYOLO as the pre-training model for the custom data set. The pre-training model already has better accuracy performance on the COCO data set. Compared with training from scratch, loading a pre-trained model will generally have faster convergence speed and higher final accuracy, and will most likely avoid problems such as gradient disappearance and gradient explosion caused by improper initialization.

The number of categories in the custom data set is usually inconsistent with the COCO data set. The detection head structure of each model in MindYOLO is related to the number of categories in the data set. Directly importing the pre-trained model may fail due to inconsistent shape. You can configure it in the yaml configuration file. Set the strict_load parameter to False, MindYOLO will automatically discard parameters with inconsistent shapes and throw a warning that the module parameter is not imported.

"},{"location":"tutorials/finetune/#model-fine-tuning-finetune","title":"Model fine-tuning (Finetune)","text":"

During the process of model fine-tuning, you can first train according to the default configuration. If the effect is not good, you can consider adjusting the following parameters: * The learning rate can be adjusted smaller to prevent loss from being difficult to converge. * per_batch_size can be adjusted according to the actual video memory usage. Generally, the larger per_batch_size is, the more accurate the gradient calculation will be. * Epochs can be adjusted according to whether the loss converges * Anchor can be adjusted according to the actual object size

Since the SHWD training set only has about 6,000 images, the yolov7-tiny model was selected for training. * Distributed model training on multi-card NPU/GPU, taking 8 cards as an example:

mpirun --allow-run-as-root -n 8 python train.py --config ./examples/finetune_SHWD/yolov7-tiny_shwd.yaml --is_parallel True\n
  • Train the model on a single card NPU/GPU/CPU:

python train.py --config ./examples/finetune_SHWD/yolov7-tiny_shwd.yaml\n
*Note: Directly using the default parameters of yolov7-tiny to train on the SHWD data set can achieve an accuracy of AP50 87.0. Changing the lr_init parameter from 0.01 to 0.001 can achieve an accuracy result of ap50 of 89.2. *

"},{"location":"tutorials/finetune/#visual-reasoning","title":"Visual reasoning","text":"

Use /demo/predict.py to use the trained model for visual reasoning. The operation method is as follows:

python demo/predict.py --config ./examples/finetune_SHWD/yolov7-tiny_shwd.yaml --weight=/path_to_ckpt/WEIGHT.ckpt --image_path /path_to_image/IMAGE.jpg\n
The reasoning effect is as follows:

"},{"location":"tutorials/modelarts/","title":"MindYOLO ModelArts Training Quick Start","text":"

This article mainly introduces the training method of MindYOLO using the ModelArts platform. ModelArts related tutorial reference Help Center

"},{"location":"tutorials/modelarts/#prepare-data-and-code","title":"Prepare data and code","text":"

Use the OBS service to upload data sets. For related operation tutorials, see OBS User Guide to obtain the [AK] of this account (https:// docs.xckpjs.com/zh-cn/browsertg/obs/obs_03_1007.html), please consult the corresponding platform administrator or account person in charge for the server address. If the AK is not in the location specified in the user guide, please also consult the platform administrator or account person in charge. . operate:

  1. Log in to obs browser+
  2. Create a bucket -> create a new folder (eg: coco)
  3. To upload data files, please place the data files in a separate folder (that is, coco in the use case). The code will copy the data in the obs bucket, and the copied content will be all files in this folder (such as coco). document. Without creating a new folder, you cannot select the complete data set.

"},{"location":"tutorials/modelarts/#prepare-code","title":"Prepare code","text":"

Also use the OBS service to upload the training code. Operation: Create a bucket -> Create a new folder (such as: mindyolo) -> Upload the code file, create an output folder at the same level of mindyolo to store training records, and create a log folder to store logs.

"},{"location":"tutorials/modelarts/#create-new-algorithm","title":"Create new algorithm","text":"
  1. Select Algorithm Management->Create in the tab.
  2. Customize the algorithm name, select Ascend-Powered-Engine for the prefabricated framework, select the MindSpore-2.0 version image for the master branch, and select the MindSpore-1.8.1 version image for the r0.1 branch. Set the code directory, startup file, input, and output. and superparameters.
  • If you need to load pre-trained weights, you can select the uploaded model file in the model selection and add the ckpt_dir parameter in the running parameters.
  • The startup file is train.py
  • To run super parameters, enable_modelarts needs to be added, and the value is True.
  • The running super parameter config path refers to the directory of the running environment preview in the training job, such as /home/ma-user/modelarts/user-job-dir/mindyolo/configs/yolov5/yolov5n.yaml
  • If distributed training scenarios are involved, the hyperparameter is_parallel needs to be added and set to True when running in distributed mode and False when running on a single card.
"},{"location":"tutorials/modelarts/#create-new-job","title":"Create new job","text":"
  1. Select in the ModelArts service: Training Management -> Training Jobs -> Create a training job, set the job name, and choose not to include it in the experiment; Create Method -> My Algorithm, select the newly created algorithm;
  2. Training input -> Data storage location, select the obs data bucket just created (coco in the example), select the output folder when preparing the code for training output, and set the config hyperparameter value according to the running environment preview;
  3. Select the resource pool, specifications, computing nodes, and select the log folder when creating the code for the job log path.
  4. Submit training and it will be running after queuing.
"},{"location":"tutorials/modelarts/#modify-job","title":"Modify job","text":"

Select Rebuild on the training job page to modify the selected job configuration.

"},{"location":"tutorials/quick_start/","title":"Quick Start","text":""},{"location":"tutorials/quick_start/#getting-started-with-mindyolo","title":"Getting Started with MindYOLO","text":"

This document provides a brief introduction to the usage of built-in command-line tools in MindYOLO.

"},{"location":"tutorials/quick_start/#inference-demo-with-pre-trained-models","title":"Inference Demo with Pre-trained Models","text":"
  1. Pick a model and its config file from the Model Zoo, such as, ./configs/yolov7/yolov7.yaml.
  2. Download the corresponding pre-trained checkpoint from the Model Zoo of each model.
  3. To run YOLO object detection with the built-in configs, please run:
# Run with Ascend (By default)\npython demo/predict.py --config ./configs/yolov7/yolov7.yaml --weight=/path_to_ckpt/WEIGHT.ckpt --image_path /path_to_image/IMAGE.jpg\n\n# Run with GPU\npython demo/predict.py --config ./configs/yolov7/yolov7.yaml --weight=/path_to_ckpt/WEIGHT.ckpt --image_path /path_to_image/IMAGE.jpg --device_target=GPU\n

For details of the command line arguments, see demo/predict.py -h or look at its source code. to understand their behavior. Some common arguments are: * To run on cpu, modify device_target to CPU. * The results will be saved in ./detect_results

"},{"location":"tutorials/quick_start/#training-evaluation-in-command-line","title":"Training & Evaluation in Command Line","text":"
  • Prepare your dataset in YOLO format. If training with COCO (YOLO format), please prepare it from yolov5 or the darknet.
  coco/\n    {train,val}2017.txt\n    annotations/\n      instances_{train,val}2017.json\n    images/\n      {train,val}2017/\n          00000001.jpg\n          ...\n          # image files that are mentioned in the corresponding train/val2017.txt\n    labels/\n      {train,val}2017/\n          00000001.txt\n          ...\n          # label files that are mentioned in the corresponding train/val2017.txt\n
  • To train a model on 8 NPUs/GPUs:

    mpirun --allow-run-as-root -n 8 python train.py --config ./configs/yolov7/yolov7.yaml  --is_parallel True\n

  • To train a model on 1 NPU/GPU/CPU:

    python train.py --config ./configs/yolov7/yolov7.yaml 

  • To evaluate a model's performance on 1 NPU/GPU/CPU:

    python test.py --config ./configs/yolov7/yolov7.yaml --weight /path_to_ckpt/WEIGHT.ckpt\n

  • To evaluate a model's performance 8 NPUs/GPUs:
    mpirun --allow-run-as-root -n 8 python test.py --config ./configs/yolov7/yolov7.yaml --weight /path_to_ckpt/WEIGHT.ckpt --is_parallel True\n
    Notes:

(1) The default hyper-parameter is used for 8-card training, and some parameters need to be adjusted in the case of a single card.

(2) The default device is Ascend, and you can modify it by specifying 'device_target' as Ascend/GPU/CPU, as these are currently supported.

(3) For more options, see train/test.py -h.

(4) To train on CloudBrain, see here

"},{"location":"tutorials/quick_start/#deployment","title":"Deployment","text":"

See here.

"},{"location":"tutorials/quick_start/#to-use-mindyolo-apis-in-your-code","title":"To use MindYOLO APIs in Your Code","text":"

Coming soon.

"},{"location":"zh/","title":"MindYOLO","text":"

MindYOLO\u57fa\u4e8emindspore\u5b9e\u73b0\u4e86\u6700\u65b0\u7684YOLO\u7cfb\u5217\u7b97\u6cd5\u3002\u4ee5\u4e0b\u662fmindyolo\u7684\u5206\u652f\u4e0emindspore\u7248\u672c\u7684\u5bf9\u5e94\u5173\u7cfb\uff1a

mindyolo mindspore master master 0.4 2.3.0 0.3 2.2.10 0.2 2.0 0.1 1.8

"},{"location":"zh/#_1","title":"\u6a21\u578b\u4ed3\u5e93\u548c\u57fa\u51c6","text":"

\u8be6\u89c1 \u6a21\u578b\u4ed3\u5e93\u8868\u683c

"},{"location":"zh/#_2","title":"\u652f\u6301\u6a21\u578b\u5217\u8868","text":"
  • YOLOv10 (\u6b22\u8fce\u5f00\u6e90\u8d21\u732e\u8005\u53c2\u4e0e\u5f00\u53d1)
  • YOLOv9 (\u6b22\u8fce\u5f00\u6e90\u8d21\u732e\u8005\u53c2\u4e0e\u5f00\u53d1)
  • YOLOv8
  • YOLOv7
  • YOLOX
  • YOLOv5
  • YOLOv4
  • YOLOv3
"},{"location":"zh/#_3","title":"\u5b89\u88c5","text":"

\u8be6\u89c1 \u5b89\u88c5

"},{"location":"zh/#_4","title":"\u5feb\u901f\u5f00\u59cb","text":"

\u8be6\u89c1 \u5feb\u901f\u5f00\u59cb

"},{"location":"zh/#_5","title":"\u8bf4\u660e","text":"

\u26a0\ufe0f \u5f53\u524d\u7248\u672c\u57fa\u4e8e \u56fe\u6a21\u5f0f\u9759\u6001shape\u5f00\u53d1\u3002 \u52a8\u6001shape\u5c06\u5728\u540e\u7eed\u652f\u6301\uff0c\u656c\u8bf7\u671f\u5f85\u3002

"},{"location":"zh/#_6","title":"\u53c2\u4e0e\u9879\u76ee","text":"

\u4e3a\u4e86\u8ba9mindyolo\u66f4\u52a0\u5b8c\u5584\u548c\u4e30\u5bcc\uff0c\u6211\u4eec\u6b22\u8fce\u5305\u62ecissue\u548cpr\u5728\u5185\u7684\u4efb\u4f55\u5f00\u6e90\u8d21\u732e\u3002

\u8bf7\u53c2\u8003 \u53c2\u4e0e\u9879\u76ee \u83b7\u53d6\u63d0\u4f9b\u5f00\u6e90\u8d21\u732e\u7684\u76f8\u5173\u6307\u5bfc\u3002

"},{"location":"zh/#_7","title":"\u8bb8\u53ef","text":"

MindYOLO\u57fa\u4e8e Apache License 2.0 \u53d1\u5e03\u3002

"},{"location":"zh/#_8","title":"\u987b\u77e5","text":"

MindYOLO \u662f\u4e00\u4e2a\u5f00\u6e90\u9879\u76ee\uff0c\u6211\u4eec\u6b22\u8fce\u4efb\u4f55\u8d21\u732e\u548c\u53cd\u9988\u3002\u6211\u4eec\u5e0c\u671b\u8be5mindyolo\u80fd\u591f\u901a\u8fc7\u63d0\u4f9b\u7075\u6d3b\u4e14\u6807\u51c6\u5316\u7684\u5de5\u5177\u5305\u6765\u652f\u6301\u4e0d\u65ad\u58ee\u5927\u7684\u7814\u7a76\u793e\u533a\uff0c\u91cd\u73b0\u73b0\u6709\u65b9\u6cd5\uff0c\u5e76\u5f00\u53d1\u81ea\u5df1\u7684\u65b0\u5b9e\u65f6\u5bf9\u8c61\u68c0\u6d4b\u65b9\u6cd5\u3002

"},{"location":"zh/#_9","title":"\u5f15\u7528","text":"

\u5982\u679c\u60a8\u53d1\u73b0\u8be5\u9879\u76ee\u5bf9\u60a8\u7684\u7814\u7a76\u6709\u7528\uff0c\u8bf7\u8003\u8651\u5f15\u7528\uff1a

@misc{MindSpore Object Detection YOLO 2023,\n    title={{MindSpore Object Detection YOLO}:MindSpore Object Detection YOLO Toolbox and Benchmark},\n    author={MindSpore YOLO Contributors},\n    howpublished = {\\url{https://github.com/mindspore-lab/mindyolo}},\n    year={2023}\n}\n
"},{"location":"zh/installation/","title":"\u5b89\u88c5","text":""},{"location":"zh/installation/#_2","title":"\u4f9d\u8d56","text":"
  • mindspore >= 2.3
  • numpy >= 1.17.0
  • pyyaml >= 5.3
  • openmpi 4.0.3 (\u5206\u5e03\u5f0f\u8bad\u7ec3\u6240\u9700)

\u5982\u9700\u5b89\u88c5python\u76f8\u5173\u5e93\u4f9d\u8d56\uff0c\u53ea\u9700\u8fd0\u884c\uff1a

pip install -r requirements.txt\n

\u5982\u9700\u5b89\u88c5MindSpore\uff0c\u4f60\u53ef\u4ee5\u901a\u8fc7\u9075\u5faa\u5b98\u65b9\u6307\u5f15\uff0c\u5728\u4e0d\u540c\u7684\u786c\u4ef6\u5e73\u53f0\u4e0a\u83b7\u5f97\u6700\u4f18\u7684\u5b89\u88c5\u4f53\u9a8c\u3002 \u4e3a\u4e86\u5728\u5206\u5e03\u5f0f\u6a21\u5f0f\u4e0b\u8fd0\u884c\uff0c\u60a8\u8fd8\u9700\u8981\u5b89\u88c5OpenMPI\u3002

\u26a0\ufe0f \u5f53\u524d\u7248\u672c\u4ec5\u652f\u6301Ascend\u5e73\u53f0\uff0cGPU\u4f1a\u5728\u540e\u7eed\u652f\u6301\uff0c\u656c\u8bf7\u671f\u5f85\u3002

"},{"location":"zh/installation/#pypi","title":"PyPI\u6e90\u5b89\u88c5","text":"

MindYOLO \u73b0\u5df2\u53d1\u5e03\u4e3a\u4e00\u4e2aPython\u5305\u5e76\u80fd\u591f\u901a\u8fc7pip\u8fdb\u884c\u5b89\u88c5\u3002\u6211\u4eec\u63a8\u8350\u60a8\u5728\u865a\u62df\u73af\u5883\u5b89\u88c5\u4f7f\u7528\u3002 \u6253\u5f00\u7ec8\u7aef\uff0c\u8f93\u5165\u4ee5\u4e0b\u6307\u4ee4\u6765\u5b89\u88c5 MindYOLO:

pip install mindyolo\n
"},{"location":"zh/installation/#_3","title":"\u6e90\u7801\u5b89\u88c5 (\u672a\u7ecf\u6d4b\u8bd5\u7248\u672c)","text":""},{"location":"zh/installation/#vsc","title":"\u901a\u8fc7VSC\u5b89\u88c5","text":"
pip install git+https://github.com/mindspore-lab/mindyolo.git\n
"},{"location":"zh/installation/#src","title":"\u901a\u8fc7\u672c\u5730src\u5b89\u88c5","text":"

\u7531\u4e8e\u672c\u9879\u76ee\u5904\u4e8e\u6d3b\u8dc3\u5f00\u53d1\u9636\u6bb5\uff0c\u5982\u679c\u60a8\u662f\u5f00\u53d1\u8005\u6216\u8005\u8d21\u732e\u8005\uff0c\u8bf7\u4f18\u5148\u9009\u62e9\u6b64\u5b89\u88c5\u65b9\u5f0f\u3002

MindYOLO \u53ef\u4ee5\u5728\u7531 GitHub \u514b\u9686\u4ed3\u5e93\u5230\u672c\u5730\u6587\u4ef6\u5939\u540e\u76f4\u63a5\u4f7f\u7528\u3002 \u8fd9\u5bf9\u4e8e\u60f3\u4f7f\u7528\u6700\u65b0\u7248\u672c\u7684\u5f00\u53d1\u8005\u5341\u5206\u65b9\u4fbf:

git clone https://github.com/mindspore-lab/mindyolo.git\n

\u5728\u514b\u9686\u5230\u672c\u5730\u4e4b\u540e\uff0c\u63a8\u8350\u60a8\u4f7f\u7528\"\u53ef\u7f16\u8f91\"\u6a21\u5f0f\u8fdb\u884c\u5b89\u88c5\uff0c\u8fd9\u6709\u52a9\u4e8e\u89e3\u51b3\u6f5c\u5728\u7684\u6a21\u5757\u5bfc\u5165\u95ee\u9898\u3002

cd mindyolo\npip install -e .\n

\u6211\u4eec\u63d0\u4f9b\u4e86\u4e00\u4e2a\u53ef\u9009\u7684 fast coco api \u63a5\u53e3\u7528\u4e8e\u63d0\u5347\u9a8c\u8bc1\u8fc7\u7a0b\u7684\u901f\u5ea6\u3002\u4ee3\u7801\u662f\u4ee5C++\u5f62\u5f0f\u63d0\u4f9b\u7684\uff0c\u53ef\u4ee5\u5c1d\u8bd5\u7528\u4ee5\u4e0b\u7684\u547d\u4ee4\u8fdb\u884c\u5b89\u88c5 (\u6b64\u64cd\u4f5c\u662f\u53ef\u9009\u7684) :

cd mindyolo/csrc\nsh build.sh\n

\u6211\u4eec\u8fd8\u63d0\u4f9b\u4e86\u57fa\u4e8eMindSpore Custom\u81ea\u5b9a\u4e49\u7b97\u5b50 \u7684GPU\u878d\u5408\u7b97\u5b50\uff0c\u7528\u4e8e\u63d0\u5347\u8bad\u7ec3\u8fc7\u7a0b\u7684\u901f\u5ea6\u3002\u4ee3\u7801\u91c7\u7528C++\u548cCUDA\u5f00\u53d1\uff0c\u4f4d\u4e8eexamples/custom_gpu_op/\u8def\u5f84\u4e0b\u3002\u60a8\u53ef\u53c2\u8003\u793a\u4f8b\u811a\u672cexamples/custom_gpu_op/iou_loss_fused.py\uff0c\u4fee\u6539mindyolo/models/losses/iou_loss.py\u7684bbox_iou\u65b9\u6cd5\uff0c\u5728GPU\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u4f7f\u7528\u8be5\u7279\u6027\u3002\u8fd0\u884ciou_loss_fused.py\u524d\uff0c\u9700\u8981\u4f7f\u7528\u4ee5\u4e0b\u7684\u547d\u4ee4\uff0c\u7f16\u8bd1\u751f\u6210GPU\u878d\u5408\u7b97\u5b50\u8fd0\u884c\u6240\u4f9d\u8d56\u7684\u52a8\u6001\u5e93 (\u6b64\u64cd\u4f5c\u5e76\u975e\u5fc5\u9700) :

bash examples/custom_gpu_op/fused_op/build.sh\n
"},{"location":"zh/how_to_guides/callback/","title":"MindYOLO\u56de\u8c03\u51fd\u6570\u7528\u6cd5","text":"

\u56de\u8c03\u51fd\u6570\uff1a\u5f53\u7a0b\u5e8f\u8fd0\u884c\u5230\u67d0\u4e2a\u6302\u8f7d\u70b9\u65f6\uff0c\u4f1a\u81ea\u52a8\u8c03\u7528\u5728\u8fd0\u884c\u65f6\u6ce8\u518c\u5230\u8be5\u6302\u8f7d\u70b9\u7684\u6240\u6709\u65b9\u6cd5\u3002 \u901a\u8fc7\u56de\u8c03\u51fd\u6570\u7684\u5f62\u5f0f\u53ef\u4ee5\u589e\u52a0\u7a0b\u5e8f\u7684\u7075\u6d3b\u6027\u548c\u6269\u5c55\u6027\uff0c\u56e0\u4e3a\u7528\u6237\u53ef\u4ee5\u5c06\u81ea\u5b9a\u4e49\u65b9\u6cd5\u6ce8\u518c\u5230\u8981\u8c03\u7528\u7684\u6302\u8f7d\u70b9\uff0c\u800c\u65e0\u9700\u4fee\u6539\u7a0b\u5e8f\u4e2d\u7684\u4ee3\u7801\u3002

\u5728MindYOLO\u4e2d\uff0c\u56de\u8c03\u51fd\u6570\u5177\u4f53\u5b9e\u73b0\u5728mindyolo/utils/callback.py\u6587\u4ef6\u4e2d\u3002

#mindyolo/utils/callback.py\n@CALLBACK_REGISTRY.registry_module()\nclass callback_class_name(BaseCallback):\n\n    def __init__(self, **kwargs):\n        super().__init__()\n        ...\n    def callback_fn_name(self, run_context: RunContext):\n        pass\n

\u901a\u8fc7\u6a21\u578b\u7684yaml\u6587\u4ef6callback\u5b57\u6bb5\u4e0b\u6dfb\u52a0\u4e00\u4e2a\u5b57\u5178\u5217\u8868\u6765\u5b9e\u73b0\u8c03\u7528

#\u56de\u8c03\u51fd\u6570\u914d\u7f6e\u5b57\u5178\uff1a\ncallback:\n- { name: callback_class_name, args: xx }\n- { name: callback_class_name2, args: xx }\n
\u4f8b\u5982\u4ee5YOLOX\u4e3a\u793a\u4f8b\uff1a

\u5728mindyolo/utils/callback.py\u6587\u4ef6YoloxSwitchTrain\u7c7b\u4e2don_train_step_begin\u65b9\u6cd5\u91cc\u9762\u6dfb\u52a0\u903b\u8f91\uff0c\u6253\u5370\u201ctrain step begin\u201d\u7684\u65e5\u5fd7

@CALLBACK_REGISTRY.registry_module()\nclass YoloxSwitchTrain(BaseCallback):\n\n    def on_train_step_begin(self, run_context: RunContext):\n         # \u81ea\u5b9a\u4e49\u903b\u8f91\n        logger.info(\"train step begin\")\n        pass\n
YOLOX\u5bf9\u5e94\u7684yaml\u6587\u4ef6configs/yolox/hyp.scratch.yaml\u7684callback\u5b57\u6bb5\u4e0b\u6dfb\u52a0\u8be5\u56de\u8c03\u51fd\u6570
callback:\n- { name: YoloxSwitchTrain, switch_epoch_num: 285 }\n
\u5219\u6bcf\u4e2a\u8bad\u7ec3step\u6267\u884c\u524d\u90fd\u4f1a\u6267\u884clogger.info(\"train step begin\")\u8bed\u53e5\u3002

\u501f\u52a9\u56de\u8c03\u51fd\u6570\uff0c\u7528\u6237\u53ef\u4ee5\u81ea\u5b9a\u4e49\u67d0\u4e2a\u6302\u8f7d\u70b9\u9700\u8981\u6267\u884c\u7684\u903b\u8f91\uff0c\u800c\u65e0\u9700\u7406\u89e3\u5b8c\u6574\u7684\u8bad\u7ec3\u6d41\u7a0b\u7684\u4ee3\u7801\u3002

"},{"location":"zh/how_to_guides/data_preparation/","title":"\u6570\u636e\u51c6\u5907","text":""},{"location":"zh/how_to_guides/data_preparation/#_2","title":"\u6570\u636e\u96c6\u683c\u5f0f\u4ecb\u7ecd","text":"

\u4e0b\u8f7dcoco2017 YOLO\u683c\u5f0f coco2017labels-segments \u4ee5\u53cacoco2017 \u539f\u59cb\u56fe\u7247 train2017 , val2017 \uff0c\u7136\u540e\u5c06coco2017 \u539f\u59cb\u56fe\u7247\u653e\u5230coco2017 YOLO\u683c\u5f0f images\u76ee\u5f55\u4e0b\uff1a

\u2514\u2500 coco2017_yolo\n    \u251c\u2500 annotations\n        \u2514\u2500 instances_val2017.json\n    \u251c\u2500 images\n        \u251c\u2500 train2017   # coco2017 \u539f\u59cb\u56fe\u7247\n        \u2514\u2500 val2017     # coco2017 \u539f\u59cb\u56fe\u7247\n    \u251c\u2500 labels\n        \u251c\u2500 train2017\n        \u2514\u2500 val2017\n    \u251c\u2500 train2017.txt\n    \u251c\u2500 val2017.txt\n    \u2514\u2500 test-dev2017.txt\n
\u5176\u4e2dtrain.txt\u6587\u4ef6\u6bcf\u884c\u5bf9\u5e94\u5355\u5f20\u56fe\u7247\u7684\u76f8\u5bf9\u8def\u5f84\uff0c\u4f8b\u5982\uff1a
./images/train2017/00000000.jpg\n./images/train2017/00000001.jpg\n./images/train2017/00000002.jpg\n./images/train2017/00000003.jpg\n./images/train2017/00000004.jpg\n./images/train2017/00000005.jpg\n
labels\u4e0b\u7684train2017\u6587\u4ef6\u5939\u4e0b\u7684txt\u6587\u4ef6\u4e3a\u76f8\u5e94\u56fe\u7247\u7684\u6807\u6ce8\u4fe1\u606f\uff0c\u652f\u6301detect\u548csegment\u4e24\u79cd\u683c\u5f0f\u3002

detect\u683c\u5f0f\uff1a\u901a\u5e38\u6bcf\u884c\u67095\u5217\uff0c\u5206\u522b\u5bf9\u5e94\u7c7b\u522bid\u4ee5\u53ca\u6807\u6ce8\u6846\u5f52\u4e00\u5316\u4e4b\u540e\u7684\u4e2d\u5fc3\u70b9\u5750\u6807xy\u548c\u5bbd\u9ad8wh

62 0.417040 0.206280 0.403600 0.412560\n62 0.818810 0.197933 0.174740 0.189680\n39 0.684540 0.277773 0.086240 0.358960\n0 0.620220 0.725853 0.751680 0.525840\n63 0.197190 0.364053 0.394380 0.669653\n39 0.932330 0.226240 0.034820 0.076640\n
segment\u683c\u5f0f\uff1a\u6bcf\u884c\u7b2c\u4e00\u4e2a\u6570\u636e\u4e3a\u7c7b\u522bid\uff0c\u540e\u7eed\u4e3a\u4e24\u4e24\u6210\u5bf9\u7684\u5f52\u4e00\u5316\u5750\u6807\u70b9x,y

45 0.782016 0.986521 0.937078 0.874167 0.957297 0.782021 0.950562 0.739333 0.825844 0.561792 0.714609 0.420229 0.657297 0.391021 0.608422 0.4 0.0303438 0.750562 0.0016875 0.811229 0.003375 0.889896 0.0320156 0.986521\n45 0.557859 0.143813 0.487078 0.0314583 0.859547 0.00897917 0.985953 0.130333 0.984266 0.184271 0.930344 0.386521 0.80225 0.480896 0.763484 0.485396 0.684266 0.39775 0.670781 0.3955 0.679219 0.310104 0.642141 0.253937 0.561234 0.155063 0.559547 0.137083\n50 0.39 0.727063 0.418234 0.649417 0.455297 0.614125 0.476469 0.614125 0.51 0.590583 0.54 0.569417 0.575297 0.562354 0.601766 0.56 0.607062 0.536479 0.614125 0.522354 0.637063 0.501167 0.665297 0.48 0.69 0.477646 0.698828 0.494125 0.698828 0.534125 0.712938 0.529417 0.742938 0.548229 0.760594 0.564708 0.774703 0.550583 0.778234 0.536479 0.781766 0.531771 0.792359 0.541167 0.802937 0.555292 0.802937 0.569417 0.802937 0.576479 0.822359 0.576479 0.822359 0.597646 0.811766 0.607062 0.811766 0.618833 0.818828 0.637646 0.820594 0.656479 0.827641 0.687063 0.827641 0.703521 0.829406 0.727063 0.838234 0.708229 0.852359 0.729417 0.868234 0.750583 0.871766 0.792938 0.877063 0.821167 0.884125 0.861167 0.817062 0.92 0.734125 0.976479 0.711172 0.988229 0.48 0.988229 0.494125 0.967063 0.517062 0.912937 0.508234 0.832937 0.485297 0.788229 0.471172 0.774125 0.395297 0.729417\n45 0.375219 0.0678333 0.375219 0.0590833 0.386828 0.0503542 0.424156 0.0315208 0.440797 0.0281458 0.464 0.0389167 0.525531 0.115583 0.611797 0.222521 0.676359 0.306583 0.678875 0.317354 0.677359 0.385271 0.66475 0.394687 0.588594 0.407458 0.417094 0.517771 0.280906 0.604521 0.0806562 0.722208 0.0256719 0.763917 0.00296875 0.809646 0 0.786104 0 0.745083 0 0.612583 0.03525 0.613271 0.0877187 0.626708 0.130594 0.626708 0.170437 0.6025 0.273844 0.548708 0.338906 0.507 0.509906 0.4115 0.604734 0.359042 0.596156 0.338188 0.595141 0.306583 0.595141 0.291792 0.579516 0.213104 0.516969 0.129042 0.498297 0.100792 0.466516 0.0987708 0.448875 0.0786042 0.405484 0.0705208 0.375219 0.0678333 0.28675 0.108375 0.282719 0.123167 0.267078 0.162854 0.266062 0.189083 0.245391 0.199833 0.203516 0.251625 0.187375 0.269771 0.159641 0.240188 0.101125 0.249604 0 0.287271 0 0.250271 0 0.245563 0.0975938 0.202521 0.203516 0.145354 0.251953 0.123167 0.28675 0.108375\n49 0.587812 0.128229 0.612281 0.0965625 0.663391 0.0840833 0.690031 0.0908125 0.700109 0.10425 0.705859 0.133042 0.700109 0.143604 0.686422 0.146479 0.664828 0.153188 0.644672 0.157042 0.629563 0.175271 0.605797 0.181021 0.595 0.147437\n49 0.7405 0.178417 0.733719 0.173896 0.727781 0.162583 0.729484 0.150167 0.738812 0.124146 0.747281 0.0981458 0.776109 0.0811875 0.804094 0.0845833 0.814266 0.102667 0.818516 0.115104 0.812578 0.133208 0.782906 0.151292 0.754063 0.172771\n49 0.602656 0.178854 0.636125 0.167875 0.655172 0.165125 0.6665 0.162375 0.680391 0.155521 0.691719 0.153458 0.703047 0.154146 0.713859 0.162375 0.724156 0.174729 0.730844 0.193271 0.733422 0.217979 0.733938 0.244063 0.733422 0.281813 0.732391 0.295542 0.728266 0.300354 0.702016 0.294854 0.682969 0.28525 0.672156 0.270146\n49 0.716891 0.0519583 0.683766 0.0103958 0.611688 0.0051875 0.568828 0.116875 0.590266 0.15325 0.590266 0.116875 0.613641 0.0857083 0.631172 0.0857083 0.6565 0.083125 0.679875 0.0883125 0.691563 0.0961042 0.711031 0.0649375\n
instances_val2017.json\u4e3acoco\u683c\u5f0f\u7684\u9a8c\u8bc1\u96c6\u6807\u6ce8\uff0c\u53ef\u76f4\u63a5\u8c03\u7528coco api\u7528\u4e8emap\u7684\u8ba1\u7b97\u3002

\u8bad\u7ec3&\u63a8\u7406\u65f6\uff0c\u9700\u4fee\u6539configs/coco.yaml\u4e2d\u7684train_set,val_set,test_set\u4e3a\u771f\u5b9e\u6570\u636e\u8def\u5f84

\u4f7f\u7528MindYOLO\u5957\u4ef6\u5b8c\u6210\u81ea\u5b9a\u4e49\u6570\u636e\u96c6finetune\u7684\u5b9e\u9645\u6848\u4f8b\u53ef\u53c2\u8003 \u5fae\u8c03

"},{"location":"zh/how_to_guides/write_a_new_model/","title":"\u6a21\u578b\u7f16\u5199\u6307\u5357","text":"

\u672c\u6587\u6863\u63d0\u4f9bMindYOLO\u7f16\u5199\u81ea\u5b9a\u4e49\u6a21\u578b\u7684\u6559\u7a0b\u3002 \u5206\u4e3a\u4e09\u4e2a\u90e8\u5206\uff1a

  • \u6a21\u578b\u5b9a\u4e49\uff1a\u6211\u4eec\u53ef\u4ee5\u76f4\u63a5\u5b9a\u4e49\u4e00\u4e2a\u7f51\u7edc\uff0c\u4e5f\u53ef\u4ee5\u4f7f\u7528yaml\u6587\u4ef6\u65b9\u5f0f\u5b9a\u4e49\u4e00\u4e2a\u7f51\u7edc\u3002
  • \u6ce8\u518c\u6a21\u578b\uff1a\u53ef\u9009\uff0c\u6ce8\u518c\u4e4b\u540e\u53ef\u4ee5\u5728create_model\u63a5\u53e3\u4e2d\u4f7f\u7528\u6587\u4ef6\u540d\u521b\u5efa\u81ea\u5b9a\u4e49\u7684\u6a21\u578b
  • \u9a8c\u8bc1: \u9a8c\u8bc1\u6a21\u578b\u662f\u5426\u53ef\u8fd0\u884c
"},{"location":"zh/how_to_guides/write_a_new_model/#_2","title":"\u6a21\u578b\u5b9a\u4e49","text":""},{"location":"zh/how_to_guides/write_a_new_model/#1python","title":"1.\u76f4\u63a5\u4f7f\u7528python\u4ee3\u7801\u6765\u7f16\u5199\u7f51\u7edc","text":""},{"location":"zh/how_to_guides/write_a_new_model/#_3","title":"\u6a21\u5757\u5bfc\u5165","text":"

\u5bfc\u5165MindSpore\u6846\u67b6\u4e2d\u7684nn\u6a21\u5757\u548cops\u6a21\u5757\uff0c\u7528\u4e8e\u5b9a\u4e49\u795e\u7ecf\u7f51\u7edc\u7684\u7ec4\u4ef6\u548c\u64cd\u4f5c\u3002

import mindspore.nn as nn\nimport mindspore.ops.operations as ops\n

"},{"location":"zh/how_to_guides/write_a_new_model/#_4","title":"\u521b\u5efa\u6a21\u578b","text":"

\u5b9a\u4e49\u4e86\u4e00\u4e2a\u7ee7\u627f\u81eann.Cell\u7684\u6a21\u578b\u7c7bMyModel\u3002\u5728\u6784\u9020\u51fd\u6570__init__\u4e2d\uff0c\u5b9a\u4e49\u6a21\u578b\u7684\u5404\u4e2a\u7ec4\u4ef6\uff1a

class MyModel(nn.Cell):\n    def __init__(self):\n        super(MyModel, self).__init__()\n        #conv1\u662f\u4e00\u4e2a2D\u5377\u79ef\u5c42\uff0c\u8f93\u5165\u901a\u9053\u6570\u4e3a3\uff0c\u8f93\u51fa\u901a\u9053\u6570\u4e3a16\uff0c\u5377\u79ef\u6838\u5927\u5c0f\u4e3a3x3\uff0c\u6b65\u957f\u4e3a1\uff0c\u586b\u5145\u4e3a1\u3002\n        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)\n        #relu\u662f\u4e00\u4e2aReLU\u6fc0\u6d3b\u51fd\u6570\u64cd\u4f5c\u3002\n        self.relu = ops.ReLU()\n        #axpool\u662f\u4e00\u4e2a2D\u6700\u5927\u6c60\u5316\u5c42\uff0c\u6c60\u5316\u7a97\u53e3\u5927\u5c0f\u4e3a2x2\uff0c\u6b65\u957f\u4e3a2\u3002\n        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)\n        #conv2\u662f\u53e6\u4e00\u4e2a2D\u5377\u79ef\u5c42\uff0c\u8f93\u5165\u901a\u9053\u6570\u4e3a16\uff0c\u8f93\u51fa\u901a\u9053\u6570\u4e3a32\uff0c\u5377\u79ef\u6838\u5927\u5c0f\u4e3a3x3\uff0c\u6b65\u957f\u4e3a1\uff0c\u586b\u5145\u4e3a1\u3002\n        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)\n        #fc\u662f\u4e00\u4e2a\u5168\u8fde\u63a5\u5c42\uff0c\u8f93\u5165\u7279\u5f81\u7ef4\u5ea6\u4e3a32x8x8\uff0c\u8f93\u51fa\u7279\u5f81\u7ef4\u5ea6\u4e3a10\u3002\n        self.fc = nn.Dense(32 * 8 * 8, 10)\n\n    #\u5728construct\u65b9\u6cd5\u4e2d\uff0c\u5b9a\u4e49\u4e86\u6a21\u578b\u7684\u524d\u5411\u4f20\u64ad\u8fc7\u7a0b\u3002\u8f93\u5165x\u7ecf\u8fc7\u5377\u79ef\u3001\u6fc0\u6d3b\u51fd\u6570\u3001\u6c60\u5316\u7b49\u64cd\u4f5c\u540e\uff0c\u901a\u8fc7\u5c55\u5e73\u64cd\u4f5c\u5c06\u7279\u5f81\u5f20\u91cf\u53d8\u4e3a\u4e00\u7ef4\u5411\u91cf\uff0c\u7136\u540e\u901a\u8fc7\u5168\u8fde\u63a5\u5c42\u5f97\u5230\u6700\u7ec8\u7684\u8f93\u51fa\u7ed3\u679c\u3002    \n    def construct(self, x): \n        x = self.conv1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n        x = self.conv2(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n        x = x.view(x.shape[0], -1)\n        x = self.fc(x)\n        return x\n
"},{"location":"zh/how_to_guides/write_a_new_model/#_5","title":"\u521b\u5efa\u6a21\u578b\u5b9e\u4f8b","text":"

\u901a\u8fc7\u5b9e\u4f8b\u5316MyModel\u7c7b\uff0c\u521b\u5efa\u4e00\u4e2a\u6a21\u578b\u5b9e\u4f8bmodel\uff0c\u540e\u7eed\u53ef\u4ee5\u4f7f\u7528\u8be5\u5b9e\u4f8b\u8fdb\u884c\u6a21\u578b\u7684\u8bad\u7ec3\u548c\u63a8\u7406\u3002

model = MyModel()\n

"},{"location":"zh/how_to_guides/write_a_new_model/#2yaml","title":"2.\u4f7f\u7528yaml\u6587\u4ef6\u7f16\u5199\u7f51\u7edc","text":"

\u901a\u5e38\u9700\u8981\u4ee5\u4e0b\u4e09\u4e2a\u6b65\u9aa4\uff1a

  • \u65b0\u5efa\u4e00\u4e2amymodel.yaml\u6587\u4ef6
  • \u65b0\u5efa\u5bf9\u5e94\u7684mymodel.py\u6587\u4ef6
  • \u5728mindyolo/models/init.py\u6587\u4ef6\u4e2d\u5f15\u5165\u8be5\u6a21\u578b

\u4ee5\u4e0b\u662f\u7f16\u5199mymodel.yaml\u6587\u4ef6\u7684\u8be6\u7ec6\u6307\u5bfc: \u4ee5\u7f16\u5199\u4e00\u4e2a\u7b80\u5355\u7f51\u7edc\u4e3a\u4f8b\uff1a \u4ee5yaml\u683c\u5f0f\u7f16\u5199\u5fc5\u8981\u53c2\u6570\uff0c\u540e\u7eed\u5728mymodel.py\u6587\u4ef6\u91cc\u9762\u53ef\u4ee5\u7528\u5230\u8fd9\u4e9b\u53c2\u6570\u3002 \u5176\u4e2dnetwork\u90e8\u5206\u4e3a\u6a21\u578b\u7f51\u7edc [[from, number, module, args], ...]\uff1a\u6bcf\u4e2a\u5143\u7d20\u4ee3\u8868\u4e00\u4e2a\u7f51\u7edc\u5c42\u7684\u914d\u7f6e\u3002

# __BASE__\u4e2d\u7684yaml\u8868\u793a\u7528\u4e8e\u7ee7\u627f\u7684\u57fa\u7840\u914d\u7f6e\u6587\u4ef6\uff0c\u91cd\u590d\u7684\u53c2\u6570\u4f1a\u88ab\u5f53\u524d\u6587\u4ef6\u8986\u76d6\uff1b\n__BASE__:\n- '../coco.yaml'\n- './hyp.scratch-high.yaml'\n\nper_batch_size: 32\nimg_size: 640\nsync_bn: False\n\nnetwork:\nmodel_name: mymodel\ndepth_multiple: 1.0  # model depth multiple\nwidth_multiple: 1.0  # layer channel multiple\nstride: [ 8, 16, 32 ]\n\n# \u9aa8\u5e72\u7f51\u7edc\u90e8\u5206\u7684\u914d\u7f6e\uff0c\u6bcf\u5c42\u7684\u5143\u7d20\u542b\u4e49\u4e3a\n# [from, number, module, args]\n# \u4ee5\u7b2c\u4e00\u5c42\u4e3a\u4f8b\uff0c[-1, 1, ConvNormAct, [32, 3, 1]], \u8868\u793a\u8f93\u5165\u6765\u81ea `-1`(\u4e0a\u4e00\u5c42) \uff0c\u91cd\u590d\u6b21\u6570\u4e3a 1\uff0c\u6a21\u5757\u540d\u4e3a ConvNormAct\uff0c\u6a21\u5757\u8f93\u5165\u53c2\u6570\u4e3a [32, 3, 1]\uff1b\nbackbone: [[-1, 1, ConvNormAct, [32, 3, 1]],  # 0\n[-1, 1, ConvNormAct, [64, 3, 2]],  # 1-P1/2\n[-1, 1, Bottleneck, [64]],\n[-1, 1, ConvNormAct, [128, 3, 2]],  # 3-P2/4\n[-1, 2, Bottleneck, [128]],\n[-1, 1, ConvNormAct, [256, 3, 2]],  # 5-P3/8\n[-1, 8, Bottleneck, [256]],\n]\n\n#head\u90e8\u5206\u7684\u914d\u7f6e \nhead: [\n[ -1, 1, ConvNormAct, [ 512, 3, 2 ] ],  # 7-P4/16\n[ -1, 8, Bottleneck, [ 512 ] ],\n[ -1, 1, ConvNormAct, [ 1024, 3, 2 ] ],  # 9-P5/32\n[ -1, 4, Bottleneck, [ 1024 ] ],  # 10\n]\n

\u7f16\u5199mymodel.py\u6587\u4ef6:

"},{"location":"zh/how_to_guides/write_a_new_model/#_6","title":"\u6a21\u5757\u5bfc\u5165","text":"

\u9700\u8981\u5bfc\u5165\u5957\u4ef6\u5185\u7684\u6a21\u5757\u3002 \u5982from .registry import register_model\u7b49\u7b49

import numpy as np\n\nimport mindspore as ms\nfrom mindspore import Tensor, nn\n\n\nfrom .initializer import initialize_defult #\u7528\u4e8e\u521d\u59cb\u5316\u6a21\u578b\u7684\u9ed8\u8ba4\u53c2\u6570\uff0c\u5305\u62ec\u6743\u91cd\u521d\u59cb\u5316\u65b9\u5f0f\u3001BN \u5c42\u53c2\u6570\u7b49\u3002\nfrom .model_factory import build_model_from_cfg #\u7528\u4e8e\u6839\u636e YAML \u914d\u7f6e\u6587\u4ef6\u4e2d\u7684\u53c2\u6570\u6784\u5efa\u76ee\u6807\u68c0\u6d4b\u6a21\u578b\uff0c\u5e76\u8fd4\u56de\u8be5\u6a21\u578b\u7684\u5b9e\u4f8b\u3002\nfrom .registry import register_model #\u7528\u4e8e\u5c06\u81ea\u5b9a\u4e49\u7684\u6a21\u578b\u6ce8\u518c\u5230 Mindyolo \u4e2d\uff0c\u4ee5\u4fbf\u5728 YAML \u914d\u7f6e\u6587\u4ef6\u4e2d\u4f7f\u7528\u3002\n\n#\u53ef\u89c1\u6027\u58f0\u660e\n__all__ = [\"MYmodel\", \"mymodel\"]\n
"},{"location":"zh/how_to_guides/write_a_new_model/#_7","title":"\u521b\u5efa\u914d\u7f6e\u5b57\u5178","text":"

_cfg\u51fd\u6570\u662f\u4e00\u4e2a\u8f85\u52a9\u51fd\u6570\uff0c\u7528\u4e8e\u521b\u5efa\u914d\u7f6e\u5b57\u5178\u3002\u5b83\u63a5\u53d7\u4e00\u4e2aurl\u53c2\u6570\u548c\u5176\u4ed6\u5173\u952e\u5b57\u53c2\u6570\uff0c\u5e76\u8fd4\u56de\u4e00\u4e2a\u5305\u542burl\u548c\u5176\u4ed6\u53c2\u6570\u7684\u5b57\u5178\u3002 default_cfgs\u662f\u4e00\u4e2a\u5b57\u5178\uff0c\u7528\u4e8e\u5b58\u50a8\u9ed8\u8ba4\u914d\u7f6e\u3002\u5728\u8fd9\u91cc\uff0cmymodel\u4f5c\u4e3a\u952e\uff0c\u4f7f\u7528_cfg\u51fd\u6570\u521b\u5efa\u4e86\u4e00\u4e2a\u914d\u7f6e\u5b57\u5178\u3002

def _cfg(url=\"\", **kwargs):\n    return {\"url\": url, **kwargs}\n\ndefault_cfgs = {\"mymodel\": _cfg(url=\"\")}\n

"},{"location":"zh/how_to_guides/write_a_new_model/#_8","title":"\u521b\u5efa\u6a21\u578b","text":"

\u5728MindSpore\u4e2d\uff0c\u6a21\u578b\u7684\u7c7b\u7ee7\u627f\u4e8enn.Cell\uff0c\u4e00\u822c\u6765\u8bf4\u9700\u8981\u91cd\u8f7d\u4ee5\u4e0b\u4e24\u4e2a\u51fd\u6570\uff1a

  • \u5728__init__\u51fd\u6570\u4e2d\uff0c\u5e94\u5f53\u5b9a\u4e49\u6a21\u578b\u4e2d\u9700\u8981\u7528\u5230\u7684module\u5c42\u3002
  • \u5728construct\u51fd\u6570\u4e2d\u5b9a\u4e49\u6a21\u578b\u524d\u5411\u903b\u8f91\u3002
class MYmodel(nn.Cell):\n\n    def __init__(self, cfg, in_channels=3, num_classes=None, sync_bn=False):\n        super(MYmodel, self).__init__()\n        self.cfg = cfg\n        self.stride = Tensor(np.array(cfg.stride), ms.int32)\n        self.stride_max = int(max(self.cfg.stride))\n        ch, nc = in_channels, num_classes\n\n        self.nc = nc  # override yaml value\n        self.model = build_model_from_cfg(model_cfg=cfg, in_channels=ch, num_classes=nc, sync_bn=sync_bn)\n        self.names = [str(i) for i in range(nc)]  # default names\n\n        initialize_defult()  # \u53ef\u9009\uff0c\u4f60\u53ef\u80fd\u9700\u8981initialize_defult\u65b9\u6cd5\u4ee5\u83b7\u5f97\u548cpytorch\u4e00\u6837\u7684conv2d\u3001dense\u5c42\u7684\u521d\u59cb\u5316\u65b9\u5f0f\uff1b\n\n    def construct(self, x):\n        return self.model(x)\n
"},{"location":"zh/how_to_guides/write_a_new_model/#_9","title":"\u6ce8\u518c\u6a21\u578b\uff08\u53ef\u9009\uff09","text":"

\u5982\u679c\u9700\u8981\u4f7f\u7528mindyolo\u63a5\u53e3\u521d\u59cb\u5316\u81ea\u5b9a\u4e49\u7684\u6a21\u578b\uff0c\u90a3\u4e48\u9700\u8981\u5148\u5bf9\u6a21\u578b\u8fdb\u884c**\u6ce8\u518c**\u548c**\u5bfc\u5165**

\u6a21\u578b\u6ce8\u518c

@register_model #\u6ce8\u518c\u540e\u7684\u6a21\u578b\u53ef\u4ee5\u901a\u8fc7 create_model \u63a5\u53e3\u4ee5\u6a21\u578b\u540d\u7684\u65b9\u5f0f\u8fdb\u884c\u8bbf\u95ee\uff1b\ndef mymodel(cfg, in_channels=3, num_classes=None, **kwargs) -> MYmodel:\n\"\"\"Get GoogLeNet model.\n    Refer to the base class `models.GoogLeNet` for more details.\"\"\"\n    model = MYmodel(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)\n    return model\n
\u6a21\u578b\u5bfc\u5165

#\u5728mindyolo/models/_init_.py\u6587\u4ef6\u4e2d\u6dfb\u52a0\u4ee5\u4e0b\u4ee3\u7801\n\nfrom . import mymodel #mymodel.py\u6587\u4ef6\u901a\u5e38\u653e\u5728mindyolo/models/\u76ee\u5f55\u4e0b\n__all__.extend(mymodel.__all__)\nfrom .mymodel import *\n
"},{"location":"zh/how_to_guides/write_a_new_model/#main","title":"\u9a8c\u8bc1main","text":"

\u521d\u59cb\u7f16\u5199\u9636\u6bb5\u5e94\u5f53\u4fdd\u8bc1\u6a21\u578b\u662f\u53ef\u8fd0\u884c\u7684\u3002\u53ef\u901a\u8fc7\u4e0b\u8ff0\u4ee3\u7801\u5757\u8fdb\u884c\u57fa\u7840\u9a8c\u8bc1\uff1a \u9996\u5148\u5bfc\u5165\u6240\u9700\u7684\u6a21\u5757\u548c\u51fd\u6570\u3002\u7136\u540e\uff0c\u901a\u8fc7\u89e3\u6790\u914d\u7f6e\u5bf9\u8c61\u3002

if __name__ == \"__main__\":\n    from mindyolo.models.model_factory import create_model\n    from mindyolo.utils.config import parse_config\n\n    opt = parse_config()\n
\u521b\u5efa\u6a21\u578b\u5e76\u6307\u5b9a\u76f8\u5173\u53c2\u6570\uff0c\u6ce8\u610f\uff1a\u5982\u679c\u8981\u5728create_model\u4e2d\u4f7f\u7528\u6587\u4ef6\u540d\u521b\u5efa\u81ea\u5b9a\u4e49\u7684\u6a21\u578b\uff0c\u90a3\u4e48\u9700\u8981\u5148\u4f7f\u7528\u6ce8\u518c\u5668@register_model\u8fdb\u884c\u6ce8\u518c\uff0c\u8bf7\u53c2\u89c1\u4e0a\u6587 \u6ce8\u518c\u6a21\u578b\uff08\u53ef\u9009)\u90e8\u5206\u5185\u5bb9
    model = create_model(\n        model_name=\"mymodel\",\n        model_cfg=opt.net,\n        num_classes=opt.data.nc,\n        sync_bn=opt.sync_bn if hasattr(opt, \"sync_bn\") else False,\n    ) \n

\u5426\u5219\uff0c\u8bf7\u4f7f\u7528import\u7684\u65b9\u5f0f\u5f15\u5165\u6a21\u578b

    from mindyolo.models.mymodel import MYmodel\n    model = MYmodel(\n        model_name=\"mymodel\",\n        model_cfg=opt.net,\n        num_classes=opt.data.nc,\n        sync_bn=opt.sync_bn if hasattr(opt, \"sync_bn\") else False,\n    ) \n
\u6700\u540e\uff0c\u521b\u5efa\u4e00\u4e2a\u8f93\u5165\u5f20\u91cfx\u5e76\u5c06\u5176\u4f20\u9012\u7ed9\u6a21\u578b\u8fdb\u884c\u524d\u5411\u8ba1\u7b97\u3002
    x = Tensor(np.random.randn(1, 3, 640, 640), ms.float32)\n    out = model(x)\n    out = out[0] if isinstance(out, (list, tuple)) else out\n    print(f\"Output shape is {[o.shape for o in out]}\")\n

"},{"location":"zh/modelzoo/benchmark/","title":"\u6a21\u578b\u4ed3\u5e93","text":""},{"location":"zh/modelzoo/benchmark/#_2","title":"\u68c0\u6d4b\u4efb\u52a1","text":"performance tested on Ascend 910(8p) with graph mode Name Scale BatchSize ImageSize Dataset Box mAP (%) Params Recipe Download YOLOv8 N 16 * 8 640 MS COCO 2017 37.2 3.2M yaml weights YOLOv8 S 16 * 8 640 MS COCO 2017 44.6 11.2M yaml weights YOLOv8 M 16 * 8 640 MS COCO 2017 50.5 25.9M yaml weights YOLOv8 L 16 * 8 640 MS COCO 2017 52.8 43.7M yaml weights YOLOv8 X 16 * 8 640 MS COCO 2017 53.7 68.2M yaml weights YOLOv7 Tiny 16 * 8 640 MS COCO 2017 37.5 6.2M yaml weights YOLOv7 L 16 * 8 640 MS COCO 2017 50.8 36.9M yaml weights YOLOv7 X 12 * 8 640 MS COCO 2017 52.4 71.3M yaml weights YOLOv5 N 32 * 8 640 MS COCO 2017 27.3 1.9M yaml weights YOLOv5 S 32 * 8 640 MS COCO 2017 37.6 7.2M yaml weights YOLOv5 M 32 * 8 640 MS COCO 2017 44.9 21.2M yaml weights YOLOv5 L 32 * 8 640 MS COCO 2017 48.5 46.5M yaml weights YOLOv5 X 16 * 8 640 MS COCO 2017 50.5 86.7M yaml weights YOLOv4 CSPDarknet53 16 * 8 608 MS COCO 2017 45.4 27.6M yaml weights YOLOv4 CSPDarknet53(silu) 16 * 8 608 MS COCO 2017 45.8 27.6M yaml weights YOLOv3 Darknet53 16 * 8 640 MS COCO 2017 45.5 61.9M yaml weights YOLOX N 8 * 8 416 MS COCO 2017 24.1 0.9M yaml weights YOLOX Tiny 8 * 8 416 MS COCO 2017 33.3 5.1M yaml weights YOLOX S 8 * 8 640 MS COCO 2017 40.7 9.0M yaml weights YOLOX M 8 * 8 640 MS COCO 2017 46.7 25.3M yaml weights YOLOX L 8 * 8 640 MS COCO 2017 49.2 54.2M yaml weights YOLOX X 8 * 8 640 MS COCO 2017 51.6 99.1M yaml weights YOLOX Darknet53 8 * 8 640 MS COCO 2017 47.7 63.7M yaml weights \u8bbe\u5907 Ascend 910*(8p) \u6d4b\u8bd5\u7ed3\u679c Name Scale BatchSize ImageSize Dataset Box mAP (%) ms/step Params Recipe Download YOLOv8 N 16 * 8 640 MS COCO 2017 37.3 373.55 3.2M yaml weights YOLOv8 S 16 * 8 640 MS COCO 2017 44.7 365.53 11.2M yaml weights YOLOv7 Tiny 16 * 8 640 MS COCO 2017 37.5 496.21 6.2M yaml weights YOLOv5 N 32 * 8 640 MS COCO 2017 27.4 736.08 1.9M yaml weights YOLOv5 S 32 * 8 640 MS COCO 2017 37.6 787.34 7.2M yaml weights YOLOv4 CSPDarknet53 16 * 8 608 MS COCO 2017 46.1 337.25 27.6M yaml weights YOLOv3 Darknet53 16 * 8 640 MS COCO 2017 46.6 396.60 61.9M yaml weights YOLOX S 8 * 8 640 MS COCO 2017 41.0 242.15 9.0M yaml weights"},{"location":"zh/modelzoo/benchmark/#_3","title":"\u56fe\u50cf\u5206\u5272","text":"\u8bbe\u5907 Ascend 910(8p) \u6d4b\u8bd5\u7ed3\u679c Name Scale BatchSize ImageSize Dataset Box mAP (%) Mask mAP (%) Params Recipe Download YOLOv8-seg X 16 * 8 640 MS COCO 2017 52.5 42.9 71.8M yaml weights"},{"location":"zh/modelzoo/benchmark/#_4","title":"\u90e8\u7f72","text":"
  • \u8be6\u89c1 \u90e8\u7f72
"},{"location":"zh/modelzoo/benchmark/#_5","title":"\u8bf4\u660e","text":"
  • Box mAP\uff1a\u5728\u9a8c\u8bc1\u96c6\u4e0a\u8ba1\u7b97\u7684\u51c6\u786e\u5ea6\u3002
"},{"location":"zh/modelzoo/yolov3/#_1","title":"\u6458\u8981","text":"

\u6211\u4eec\u5bf9YOLO\u8fdb\u884c\u4e86\u4e00\u7cfb\u5217\u66f4\u65b0\uff01\u5b83\u5305\u542b\u4e00\u5806\u5c0f\u8bbe\u8ba1\uff0c\u53ef\u4ee5\u4f7f\u7cfb\u7edf\u7684\u6027\u80fd\u5f97\u5230\u66f4\u65b0\u3002\u6211\u4eec\u4e5f\u8bad\u7ec3\u4e86\u4e00\u4e2a\u65b0\u7684\u3001\u6bd4\u8f83\u5927\u7684\u795e\u7ecf\u7f51\u7edc\u3002\u867d\u7136\u6bd4\u4e0a\u4e00\u7248\u66f4\u5927\u4e00\u4e9b\uff0c\u4f46\u662f\u7cbe\u5ea6\u4e5f\u63d0\u9ad8\u4e86\u3002\u4e0d\u7528\u62c5\u5fc3\uff0c\u5b83\u7684\u901f\u5ea6\u4f9d\u7136\u5f88\u5feb\u3002YOLOv3\u5728320\u00d7320\u8f93\u5165\u56fe\u50cf\u4e0a\u8fd0\u884c\u65f6\u53ea\u970022ms\uff0c\u5e76\u80fd\u8fbe\u523028.2mAP\uff0c\u5176\u7cbe\u5ea6\u548cSSD\u76f8\u5f53\uff0c\u4f46\u901f\u5ea6\u8981\u5feb\u4e0a3\u500d\u3002\u4f7f\u7528\u4e4b\u524d0.5 IOU mAP\u7684\u68c0\u6d4b\u6307\u6807\uff0cYOLOv3\u7684\u6548\u679c\u662f\u76f8\u5f53\u4e0d\u9519\u3002YOLOv3\u4f7f\u7528Titan X GPU\uff0c\u5176\u8017\u65f651ms\u68c0\u6d4b\u7cbe\u5ea6\u8fbe\u523057.9 AP50\uff0c\u4e0eRetinaNet\u76f8\u6bd4\uff0c\u5176\u7cbe\u5ea6\u53ea\u670957.5 AP50\uff0c\u4f46\u5374\u8017\u65f6198ms\uff0c\u76f8\u540c\u6027\u80fd\u7684\u6761\u4ef6\u4e0bYOLOv3\u901f\u5ea6\u6bd4RetinaNet\u5feb3.8\u500d\u3002

"},{"location":"zh/modelzoo/yolov3/#_2","title":"\u7ed3\u679c","text":"\u4f7f\u7528\u56fe\u6a21\u5f0f\u5728 Ascend 910(8p) \u4e0a\u6d4b\u8bd5\u7684\u8868\u73b0 Name Scale BatchSize ImageSize Dataset Box mAP (%) Params Recipe Download YOLOv3 Darknet53 16 * 8 640 MS COCO 2017 45.5 61.9M yaml weights \u5728Ascend 910*(8p)\u4e0a\u6d4b\u8bd5\u7684\u8868\u73b0 Name Scale BatchSize ImageSize Dataset Box mAP (%) ms/step Params Recipe Download YOLOv3 Darknet53 16 * 8 640 MS COCO 2017 46.6 396.60 61.9M yaml weights"},{"location":"zh/modelzoo/yolov3/#_3","title":"\u8bf4\u660e","text":"
  • Box mAP\uff1a\u9a8c\u8bc1\u96c6\u4e0a\u6d4b\u8bd5\u51fa\u7684\u51c6\u786e\u5ea6\u3002
  • \u6211\u4eec\u53c2\u8003\u4e86\u5e38\u7528\u7684\u7b2c\u4e09\u65b9 YOLOv3 \u7684\u5b9e\u73b0\u3002
"},{"location":"zh/modelzoo/yolov3/#_4","title":"\u5feb\u901f\u5165\u95e8","text":"

\u8be6\u60c5\u8bf7\u53c2\u9605 MindYOLO \u4e2d\u7684 \u5feb\u901f\u5165\u95e8\u3002

"},{"location":"zh/modelzoo/yolov3/#_5","title":"\u8bad\u7ec3","text":""},{"location":"zh/modelzoo/yolov3/#-","title":"- \u9884\u8bad\u7ec3\u6a21\u578b","text":"

\u60a8\u53ef\u4ee5\u4ece \u6b64\u5904 \u83b7\u53d6\u9884\u8bad\u7ec3\u6a21\u578b\u3002

\u8981\u5c06\u5176\u8f6c\u6362\u4e3a mindyolo \u53ef\u52a0\u8f7d\u7684 ckpt \u6587\u4ef6\uff0c\u8bf7\u5c06\u5176\u653e\u5728\u6839\u76ee\u5f55\u4e2d\uff0c\u7136\u540e\u8fd0\u884c\u4ee5\u4e0b\u8bed\u53e5\uff1a

python mindyolo/utils/convert_weight_darknet53.py\n

"},{"location":"zh/modelzoo/yolov3/#-_1","title":"- \u5206\u5e03\u5f0f\u8bad\u7ec3","text":"

\u4f7f\u7528\u9884\u7f6e\u7684\u8bad\u7ec3\u914d\u65b9\u53ef\u4ee5\u8f7b\u677e\u91cd\u73b0\u62a5\u544a\u7684\u7ed3\u679c\u3002\u5982\u9700\u5728\u591a\u53f0Ascend 910\u8bbe\u5907\u4e0a\u8fdb\u884c\u5206\u5e03\u5f0f\u8bad\u7ec3\uff0c\u8bf7\u8fd0\u884c

# \u5728\u591a\u53f0GPU/Ascend\u8bbe\u5907\u4e0a\u8fdb\u884c\u5206\u5e03\u5f0f\u8bad\u7ec3\nmpirun -n 8 python train.py --config ./configs/yolov3/yolov3.yaml --device_target Ascend --is_parallel True\n

\u5982\u679c\u811a\u672c\u7531root\u7528\u6237\u6267\u884c\uff0c\u5219\u5fc5\u987b\u5728mpirun\u4e2d\u6dfb\u52a0--allow-run-as-root\u53c2\u6570\u3002

\u540c\u6837\u7684\uff0c\u60a8\u53ef\u4ee5\u4f7f\u7528\u4e0a\u8ff0mpirun\u547d\u4ee4\u5728\u591a\u53f0GPU\u8bbe\u5907\u4e0a\u8bad\u7ec3\u6a21\u578b\u3002

\u6709\u5173\u6240\u6709\u8d85\u53c2\u6570\u7684\u8be6\u7ec6\u8bf4\u660e\uff0c\u8bf7\u53c2\u9605config.py\u3002

\u6ce8\u610f\uff1a \u7531\u4e8e\u5168\u5c40batch size\uff08batch_size x \u8bbe\u5907\u6570\uff09\u662f\u4e00\u4e2a\u91cd\u8981\u7684\u8d85\u53c2\u6570\uff0c\u5efa\u8bae\u4fdd\u6301\u5168\u5c40batch size\u4e0d\u53d8\u8fdb\u884c\u590d\u5236\uff0c\u6216\u8005\u5c06\u5b66\u4e60\u7387\u7ebf\u6027\u8c03\u6574\u4e3a\u65b0\u7684\u5168\u5c40batch size\u3002

"},{"location":"zh/modelzoo/yolov3/#-_2","title":"- \u5355\u5361\u8bad\u7ec3","text":"

\u5982\u679c\u60a8\u60f3\u5728\u8f83\u5c0f\u7684\u6570\u636e\u96c6\u4e0a\u8bad\u7ec3\u6216\u5fae\u8c03\u6a21\u578b\u800c\u4e0d\u8fdb\u884c\u5206\u5e03\u5f0f\u8bad\u7ec3\uff0c\u8bf7\u8fd0\u884c\uff1a

# \u5728 CPU/GPU/Ascend \u8bbe\u5907\u4e0a\u8fdb\u884c\u5355\u5361\u8bad\u7ec3\npython train.py --config ./configs/yolov3/yolov3.yaml --device_target Ascend\n
"},{"location":"zh/modelzoo/yolov3/#_6","title":"\u9a8c\u8bc1\u548c\u6d4b\u8bd5","text":"

\u8981\u9a8c\u8bc1\u8bad\u7ec3\u6a21\u578b\u7684\u51c6\u786e\u6027\uff0c\u60a8\u53ef\u4ee5\u4f7f\u7528 test.py \u5e76\u4f7f\u7528 --weight \u4f20\u5165\u6743\u91cd\u8def\u5f84\u3002

python test.py --config ./configs/yolov3/yolov3.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt\n
"},{"location":"zh/modelzoo/yolov3/#_7","title":"\u90e8\u7f72","text":"

\u8be6\u89c1 \u90e8\u7f72\u3002

"},{"location":"zh/modelzoo/yolov3/#_8","title":"\u5f15\u7528","text":"

[1] Jocher Glenn. YOLOv3 release v9.1. https://github.com/ultralytics/yolov3/releases/tag/v9.1, 2021. [2] Joseph Redmon and Ali Farhadi. YOLOv3: An incremental improvement. arXiv preprint arXiv:1804.02767, 2018.

"},{"location":"zh/modelzoo/yolov4/#_1","title":"\u6458\u8981","text":"

\u76ee\u524d\u6709\u5f88\u591a\u53ef\u4ee5\u63d0\u9ad8CNN\u51c6\u786e\u6027\u7684\u7b97\u6cd5\u3002\u8fd9\u4e9b\u7b97\u6cd5\u7684\u7ec4\u5408\u5728\u5e9e\u5927\u6570\u636e\u96c6\u4e0a\u8fdb\u884c\u6d4b\u8bd5\u3001\u5bf9\u5b9e\u9a8c\u7ed3\u679c\u8fdb\u884c\u7406\u8bba\u9a8c\u8bc1\u90fd\u662f\u975e\u5e38\u5fc5\u8981\u7684\u3002 \u6709\u4e9b\u7b97\u6cd5\u53ea\u5728\u7279\u5b9a\u7684\u6a21\u578b\u4e0a\u6709\u6548\u679c\uff0c\u5e76\u4e14\u53ea\u5bf9\u7279\u5b9a\u7684\u95ee\u9898\u6709\u6548\uff0c\u6216\u8005\u53ea\u5bf9\u5c0f\u89c4\u6a21\u7684\u6570\u636e\u96c6\u6709\u6548\uff1b \u7136\u800c\u6709\u4e9b\u7b97\u6cd5\uff0c\u6bd4\u5982batch-normalization\u548cresidual-connections\uff0c\u5bf9\u5927\u591a\u6570\u7684\u6a21\u578b\u3001\u4efb\u52a1\u548c\u6570\u636e\u96c6\u90fd\u9002\u7528\u3002 \u6211\u4eec\u8ba4\u4e3a\u8fd9\u6837\u901a\u7528\u7684\u7b97\u6cd5\u5305\u62ec\uff1aWeighted-Residual-Connections\uff08WRC), Cross-Stage-Partial-connections\uff08CSP\uff09, Cross mini-Batch Normalization\uff08CmBN\uff09, Self-adversarial-training\uff08SAT\uff09\u4ee5\u53caMish-activation\u3002 \u6211\u4eec\u4f7f\u7528\u4e86\u65b0\u7684\u7b97\u6cd5\uff1aWRC, CSP, CmBN, SAT, Mish activation, Mosaic data augmentation, CmBN, Dropblock regularization \u548cCIoU loss\u4ee5\u53ca\u5b83\u4eec\u7684\u7ec4\u5408\uff0c \u83b7\u5f97\u4e86\u6700\u4f18\u7684\u6548\u679c\uff1a\u5728MS COCO\u6570\u636e\u96c6\u4e0a\u7684AP\u503c\u4e3a43.5%(65.7% AP50)\uff0c\u5728Tesla V100\u4e0a\u7684\u5b9e\u65f6\u63a8\u7406\u901f\u5ea6\u4e3a65FPS\u3002

"},{"location":"zh/modelzoo/yolov4/#_2","title":"\u7ed3\u679c","text":"\u4f7f\u7528\u56fe\u6a21\u5f0f\u5728 Ascend 910(8p) \u4e0a\u6d4b\u8bd5\u7684\u8868\u73b0 Name Scale BatchSize ImageSize Dataset Box mAP (%) Params Recipe Download YOLOv4 CSPDarknet53 16 * 8 608 MS COCO 2017 45.4 27.6M yaml weights YOLOv4 CSPDarknet53(silu) 16 * 8 608 MS COCO 2017 45.8 27.6M yaml weights \u5728Ascend 910*(8p)\u4e0a\u6d4b\u8bd5\u7684\u8868\u73b0 Name Scale BatchSize ImageSize Dataset Box mAP (%) ms/step Params Recipe Download YOLOv4 CSPDarknet53 16 * 8 608 MS COCO 2017 46.1 337.25 27.6M yaml weights"},{"location":"zh/modelzoo/yolov4/#_3","title":"\u8bf4\u660e","text":"
  • Box mAP: \u9a8c\u8bc1\u96c6\u4e0a\u6d4b\u8bd5\u51fa\u7684\u51c6\u786e\u5ea6\u3002
"},{"location":"zh/modelzoo/yolov4/#_4","title":"\u5feb\u901f\u5165\u95e8","text":"

\u8be6\u60c5\u8bf7\u53c2\u9605 MindYOLO \u4e2d\u7684 \u5feb\u901f\u5165\u95e8\u3002

"},{"location":"zh/modelzoo/yolov4/#_5","title":"\u8bad\u7ec3","text":""},{"location":"zh/modelzoo/yolov4/#-","title":"- \u9884\u8bad\u7ec3\u6a21\u578b","text":"

\u60a8\u53ef\u4ee5\u4ece \u6b64\u5904 \u83b7\u53d6\u9884\u8bad\u7ec3\u6a21\u578b\u3002

\u8981\u5c06\u5176\u8f6c\u6362\u4e3a mindyolo \u53ef\u52a0\u8f7d\u7684 ckpt \u6587\u4ef6\uff0c\u8bf7\u5c06\u5176\u653e\u5728\u6839\u76ee\u5f55\u4e2d\uff0c\u7136\u540e\u8fd0\u884c\u4ee5\u4e0b\u8bed\u53e5\uff1a

python mindyolo/utils/convert_weight_cspdarknet53.py\n

"},{"location":"zh/modelzoo/yolov4/#-_1","title":"- \u5206\u5e03\u5f0f\u8bad\u7ec3","text":"

\u4f7f\u7528\u9884\u7f6e\u7684\u8bad\u7ec3\u914d\u65b9\u53ef\u4ee5\u8f7b\u677e\u91cd\u73b0\u62a5\u544a\u7684\u7ed3\u679c\u3002\u5982\u9700\u5728\u591a\u53f0Ascend 910\u8bbe\u5907\u4e0a\u8fdb\u884c\u5206\u5e03\u5f0f\u8bad\u7ec3\uff0c\u8bf7\u8fd0\u884c

# distributed training on multiple GPU/Ascend devices\nmpirun -n 8 python train.py --config ./configs/yolov4/yolov4-silu.yaml --device_target Ascend --is_parallel True --epochs 320\n

\u5982\u679c\u811a\u672c\u7531root\u7528\u6237\u6267\u884c\uff0c\u5219\u5fc5\u987b\u5728mpirun\u4e2d\u6dfb\u52a0--allow-run-as-root\u53c2\u6570\u3002

\u540c\u6837\u7684\uff0c\u60a8\u53ef\u4ee5\u4f7f\u7528\u4e0a\u8ff0mpirun\u547d\u4ee4\u5728\u591a\u53f0GPU\u8bbe\u5907\u4e0a\u8bad\u7ec3\u6a21\u578b\u3002

\u6709\u5173\u6240\u6709\u8d85\u53c2\u6570\u7684\u8be6\u7ec6\u8bf4\u660e\uff0c\u8bf7\u53c2\u9605config.py\u3002

"},{"location":"zh/modelzoo/yolov4/#_6","title":"\u8bf4\u660e","text":"
  • \u7531\u4e8e\u5168\u5c40batch size\uff08batch_size x \u8bbe\u5907\u6570\uff09\u662f\u4e00\u4e2a\u91cd\u8981\u7684\u8d85\u53c2\u6570\uff0c\u5efa\u8bae\u4fdd\u6301\u5168\u5c40batch size\u4e0d\u53d8\u8fdb\u884c\u590d\u5236\uff0c\u6216\u8005\u5c06\u5b66\u4e60\u7387\u7ebf\u6027\u8c03\u6574\u4e3a\u65b0\u7684\u5168\u5c40batch size\u3002
  • \u5982\u679c\u51fa\u73b0\u4ee5\u4e0b\u8b66\u544a\uff0c\u53ef\u4ee5\u901a\u8fc7\u8bbe\u7f6e\u73af\u5883\u53d8\u91cf PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning' \u6765\u4fee\u590d\u3002
    multiprocessing/semaphore_tracker.py: 144 UserWarning: semaphore_tracker: There appear to be 235 leaked semaphores to clean up at shutdown len(cache))\n
"},{"location":"zh/modelzoo/yolov4/#-_2","title":"- \u5355\u5361\u8bad\u7ec3","text":"

\u5982\u679c\u60a8\u60f3\u5728\u8f83\u5c0f\u7684\u6570\u636e\u96c6\u4e0a\u8bad\u7ec3\u6216\u5fae\u8c03\u6a21\u578b\u800c\u4e0d\u8fdb\u884c\u5206\u5e03\u5f0f\u8bad\u7ec3\uff0c\u8bf7\u8fd0\u884c\uff1a

# \u5728 CPU/GPU/Ascend \u8bbe\u5907\u4e0a\u8fdb\u884c\u5355\u5361\u8bad\u7ec3\npython train.py --config ./configs/yolov4/yolov4-silu.yaml --device_target Ascend --epochs 320\n
"},{"location":"zh/modelzoo/yolov4/#_7","title":"\u9a8c\u8bc1\u548c\u6d4b\u8bd5","text":"

\u8981\u9a8c\u8bc1\u8bad\u7ec3\u6a21\u578b\u7684\u51c6\u786e\u6027\uff0c\u60a8\u53ef\u4ee5\u4f7f\u7528 test.py \u5e76\u4f7f\u7528 --weight \u4f20\u5165\u6743\u91cd\u8def\u5f84\u3002

python test.py --config ./configs/yolov4/yolov4-silu.yaml --device_target Ascend --iou_thres 0.6 --weight /PATH/TO/WEIGHT.ckpt\n
"},{"location":"zh/modelzoo/yolov4/#_8","title":"\u90e8\u7f72","text":"

\u8be6\u89c1 \u90e8\u7f72.

"},{"location":"zh/modelzoo/yolov4/#_9","title":"\u5f15\u7528","text":"

[1] Alexey Bochkovskiy, Chien-Yao Wang and Ali Farhadi. YOLOv4: Optimal Speed and Accuracy of Object Detection. arXiv preprint arXiv:2004.10934, 2020.

"},{"location":"zh/modelzoo/yolov5/#_1","title":"\u6458\u8981","text":"

YOLOv5 \u662f\u5728 COCO \u6570\u636e\u96c6\u4e0a\u9884\u8bad\u7ec3\u7684\u4e00\u7cfb\u5217\u5bf9\u8c61\u68c0\u6d4b\u67b6\u6784\u548c\u6a21\u578b\uff0c\u4ee3\u8868\u4e86 Ultralytics \u5bf9\u672a\u6765\u89c6\u89c9 AI \u65b9\u6cd5\u7684\u5f00\u6e90\u7814\u7a76\uff0c\u878d\u5408\u4e86\u6570\u5343\u5c0f\u65f6\u7684\u7814\u7a76\u548c\u5f00\u53d1\u4e2d\u79ef\u7d2f\u7684\u7ecf\u9a8c\u6559\u8bad\u548c\u6700\u4f73\u5b9e\u8df5\u3002

"},{"location":"zh/modelzoo/yolov5/#_2","title":"\u7ed3\u679c","text":"\u4f7f\u7528\u56fe\u6a21\u5f0f\u5728 Ascend 910(8p) \u4e0a\u6d4b\u8bd5\u7684\u8868\u73b0 Name Scale BatchSize ImageSize Dataset Box mAP (%) Params Recipe Download YOLOv5 N 32 * 8 640 MS COCO 2017 27.3 1.9M yaml weights YOLOv5 S 32 * 8 640 MS COCO 2017 37.6 7.2M yaml weights YOLOv5 M 32 * 8 640 MS COCO 2017 44.9 21.2M yaml weights YOLOv5 L 32 * 8 640 MS COCO 2017 48.5 46.5M yaml weights YOLOv5 X 16 * 8 640 MS COCO 2017 50.5 86.7M yaml weights \u5728Ascend 910*(8p)\u4e0a\u6d4b\u8bd5\u7684\u8868\u73b0 Name Scale BatchSize ImageSize Dataset Box mAP (%) ms/step Params Recipe Download YOLOv5 N 32 * 8 640 MS COCO 2017 27.4 736.08 1.9M yaml weights YOLOv5 S 32 * 8 640 MS COCO 2017 37.6 787.34 7.2M yaml weights"},{"location":"zh/modelzoo/yolov5/#_3","title":"\u8bf4\u660e","text":"
  • Box mAP\uff1a\u9a8c\u8bc1\u96c6\u4e0a\u6d4b\u8bd5\u51fa\u7684\u51c6\u786e\u5ea6\u3002
  • \u6211\u4eec\u53c2\u8003\u4e86\u5e38\u7528\u7684\u7b2c\u4e09\u65b9 YOLOV5 \u91cd\u73b0\u4e86P5\uff08\u5927\u76ee\u6807\uff09\u7cfb\u5217\u6a21\u578b\uff0c\u5e76\u505a\u51fa\u4e86\u5982\u4e0b\u6539\u52a8\uff1a\u4e0e\u5b98\u65b9\u4ee3\u7801\u6709\u6240\u4e0d\u540c\uff0c\u6211\u4eec\u4f7f\u7528\u4e868x NPU(Ascend910)\u8fdb\u884c\u8bad\u7ec3\uff0c\u5355NPU\u7684batch size\u4e3a32\u3002
"},{"location":"zh/modelzoo/yolov5/#_4","title":"\u5feb\u901f\u5165\u95e8","text":"

\u8be6\u60c5\u8bf7\u53c2\u9605 MindYOLO \u4e2d\u7684 \u5feb\u901f\u5165\u95e8\u3002

"},{"location":"zh/modelzoo/yolov5/#_5","title":"\u8bad\u7ec3","text":""},{"location":"zh/modelzoo/yolov5/#-","title":"- \u5206\u5e03\u5f0f\u8bad\u7ec3","text":"

\u4f7f\u7528\u9884\u7f6e\u7684\u8bad\u7ec3\u914d\u65b9\u53ef\u4ee5\u8f7b\u677e\u91cd\u73b0\u62a5\u544a\u7684\u7ed3\u679c\u3002\u5982\u9700\u5728\u591a\u53f0Ascend 910\u8bbe\u5907\u4e0a\u8fdb\u884c\u5206\u5e03\u5f0f\u8bad\u7ec3\uff0c\u8bf7\u8fd0\u884c

# \u5728\u591a\u53f0GPU/Ascend\u8bbe\u5907\u4e0a\u8fdb\u884c\u5206\u5e03\u5f0f\u8bad\u7ec3\nmpirun -n 8 python train.py --config ./configs/yolov5/yolov5n.yaml --device_target Ascend --is_parallel True\n

\u5982\u679c\u811a\u672c\u7531root\u7528\u6237\u6267\u884c\uff0c\u5219\u5fc5\u987b\u5728mpirun\u4e2d\u6dfb\u52a0--allow-run-as-root\u53c2\u6570\u3002

\u540c\u6837\u7684\uff0c\u60a8\u53ef\u4ee5\u4f7f\u7528\u4e0a\u8ff0mpirun\u547d\u4ee4\u5728\u591a\u53f0GPU\u8bbe\u5907\u4e0a\u8bad\u7ec3\u6a21\u578b\u3002

\u6709\u5173\u6240\u6709\u8d85\u53c2\u6570\u7684\u8be6\u7ec6\u8bf4\u660e\uff0c\u8bf7\u53c2\u9605config.py\u3002

\u6ce8\u610f\uff1a \u7531\u4e8e\u5168\u5c40batch size\uff08batch_size x \u8bbe\u5907\u6570\uff09\u662f\u4e00\u4e2a\u91cd\u8981\u7684\u8d85\u53c2\u6570\uff0c\u5efa\u8bae\u4fdd\u6301\u5168\u5c40batch size\u4e0d\u53d8\u8fdb\u884c\u590d\u5236\uff0c\u6216\u8005\u5c06\u5b66\u4e60\u7387\u7ebf\u6027\u8c03\u6574\u4e3a\u65b0\u7684\u5168\u5c40batch size\u3002

"},{"location":"zh/modelzoo/yolov5/#-_1","title":"- \u5355\u5361\u8bad\u7ec3","text":"

\u5982\u679c\u60a8\u60f3\u5728\u8f83\u5c0f\u7684\u6570\u636e\u96c6\u4e0a\u8bad\u7ec3\u6216\u5fae\u8c03\u6a21\u578b\u800c\u4e0d\u8fdb\u884c\u5206\u5e03\u5f0f\u8bad\u7ec3\uff0c\u8bf7\u8fd0\u884c\uff1a

# \u5728 CPU/GPU/Ascend \u8bbe\u5907\u4e0a\u8fdb\u884c\u5355\u5361\u8bad\u7ec3\npython train.py --config ./configs/yolov5/yolov5n.yaml --device_target Ascend\n
"},{"location":"zh/modelzoo/yolov5/#_6","title":"\u9a8c\u8bc1\u548c\u6d4b\u8bd5","text":"

\u8981\u9a8c\u8bc1\u8bad\u7ec3\u6a21\u578b\u7684\u51c6\u786e\u6027\uff0c\u60a8\u53ef\u4ee5\u4f7f\u7528 test.py \u5e76\u4f7f\u7528 --weight \u4f20\u5165\u6743\u91cd\u8def\u5f84\u3002

python test.py --config ./configs/yolov5/yolov5n.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt\n
"},{"location":"zh/modelzoo/yolov5/#_7","title":"\u90e8\u7f72","text":"

\u8be6\u89c1 \u90e8\u7f72\u3002

"},{"location":"zh/modelzoo/yolov5/#_8","title":"\u5f15\u7528","text":"

[1] Jocher Glenn. YOLOv5 release v6.1. https://github.com/ultralytics/yolov5/releases/tag/v6.1, 2022.

"},{"location":"zh/modelzoo/yolov7/#_1","title":"\u6458\u8981","text":"

YOLOv7\u57285FPS\u5230 160 FPS \u8303\u56f4\u5185\u7684\u901f\u5ea6\u548c\u51c6\u786e\u5ea6\u90fd\u8d85\u8fc7\u4e86\u6240\u6709\u5df2\u77e5\u7684\u7269\u4f53\u68c0\u6d4b\u5668\uff0cYOLOv7 \u5728 5 FPS \u5230 160 FPS \u8303\u56f4\u5185\u7684\u901f\u5ea6\u548c\u51c6\u786e\u5ea6\u90fd\u8d85\u8fc7\u4e86\u6240\u6709\u5df2\u77e5\u7684\u76ee\u6807\u68c0\u6d4b\u5668\uff0c\u5e76\u4e14\u5728 GPU V100 \u4e0a 30 FPS \u6216\u66f4\u9ad8\u7684\u6240\u6709\u5df2\u77e5\u5b9e\u65f6\u76ee\u6807\u68c0\u6d4b\u5668\u4e2d\u5177\u6709\u6700\u9ad8\u7684\u51c6\u786e\u5ea6 56.8% AP\u3002YOLOv7-E6 \u76ee\u6807\u68c0\u6d4b\u5668\uff0856 FPS V100\uff0c55.9% AP\uff09\u6bd4\u57fa\u4e8etransformer-based\u7684\u68c0\u6d4b\u5668 SWINL Cascade-Mask R-CNN\uff089.2 FPS A100\uff0c53.9% AP\uff09\u7684\u901f\u5ea6\u548c\u51c6\u786e\u5ea6\u5206\u522b\u9ad8\u51fa 509% \u548c 2%\uff0c\u4ee5\u53ca\u57fa\u4e8e\u5377\u79ef\u7684\u68c0\u6d4b\u5668 ConvNeXt-XL Cascade-Mask R-CNN (8.6 FPS A100, 55.2% AP) \u901f\u5ea6\u63d0\u9ad8 551%\uff0c\u51c6\u786e\u7387\u63d0\u9ad8 0.7%\uff0c\u4ee5\u53ca YOLOv7 \u7684\u8868\u73b0\u4f18\u4e8e\uff1aYOLOR\u3001YOLOX\u3001Scaled-YOLOv4\u3001YOLOv5\u3001DETR\u3001Deformable DETR , DINO-5scale-R50, ViT-Adapter-B \u548c\u8bb8\u591a\u5176\u4ed6\u7269\u4f53\u63a2\u6d4b\u5668\u5728\u901f\u5ea6\u548c\u51c6\u786e\u5ea6\u4e0a\u3002 \u6b64\u5916\uff0c\u6211\u4eec\u53ea\u5728 MS COCO \u6570\u636e\u96c6\u4e0a\u4ece\u5934\u5f00\u59cb\u8bad\u7ec3 YOLOv7\uff0c\u800c\u4e0d\u4f7f\u7528\u4efb\u4f55\u5176\u4ed6\u6570\u636e\u96c6\u6216\u9884\u8bad\u7ec3\u7684\u6743\u91cd\u3002

"},{"location":"zh/modelzoo/yolov7/#_2","title":"\u7ed3\u679c","text":"\u4f7f\u7528\u56fe\u6a21\u5f0f\u5728 Ascend 910(8p) \u4e0a\u6d4b\u8bd5\u7684\u8868\u73b0 Name Scale BatchSize ImageSize Dataset Box mAP (%) Params Recipe Download YOLOv7 Tiny 16 * 8 640 MS COCO 2017 37.5 6.2M yaml weights YOLOv7 L 16 * 8 640 MS COCO 2017 50.8 36.9M yaml weights YOLOv7 X 12 * 8 640 MS COCO 2017 52.4 71.3M yaml weights \u5728Ascend 910*(8p)\u4e0a\u6d4b\u8bd5\u7684\u8868\u73b0 Name Scale BatchSize ImageSize Dataset Box mAP (%) ms/step Params Recipe Download YOLOv7 Tiny 16 * 8 640 MS COCO 2017 37.5 496.21 6.2M yaml weights"},{"location":"zh/modelzoo/yolov7/#_3","title":"\u8bf4\u660e","text":"
  • Context\uff1a\u8bad\u7ec3\u4e0a\u4e0b\u6587\uff0c\u8868\u793a\u4e3a{\u8bbe\u5907}x{\u8bbe\u5907\u6570}-{mindspore\u6a21\u5f0f}\uff0c\u5176\u4e2dmindspore\u6a21\u5f0f\u53ef\u4ee5\u662fG-\u56fe\u6a21\u5f0f\u6216F-pynative\u6a21\u5f0f\u3002\u4f8b\u5982\uff0cD910x8-G\u7528\u4e8e\u57288\u5757Ascend 910 NPU\u4e0a\u4f7f\u7528graph\u6a21\u5f0f\u8fdb\u884c\u8bad\u7ec3\u3002
  • Box mAP\uff1a\u9a8c\u8bc1\u96c6\u4e0a\u6d4b\u8bd5\u51fa\u7684\u51c6\u786e\u5ea6\u3002
  • \u6211\u4eec\u53c2\u8003\u4e86\u5e38\u7528\u7684\u7b2c\u4e09\u65b9 YOLOV7 \u91cd\u73b0\u4e86P5\uff08\u5927\u76ee\u6807\uff09\u7cfb\u5217\u6a21\u578b\uff0c\u5e76\u505a\u51fa\u4e86\u5982\u4e0b\u6539\u52a8\uff1a\u4e0e\u5b98\u65b9\u4ee3\u7801\u6709\u6240\u4e0d\u540c\uff0c\u6211\u4eec\u4f7f\u7528\u4e868x NPU(Ascend910)\u8fdb\u884c\u8bad\u7ec3\uff0ctiny/l/x\u5355NPU\u7684batch size\u5206\u522b\u4e3a16/16/12\u3002
"},{"location":"zh/modelzoo/yolov7/#_4","title":"\u5feb\u901f\u5165\u95e8","text":"

\u8be6\u60c5\u8bf7\u53c2\u9605 MindYOLO \u4e2d\u7684 \u5feb\u901f\u5165\u95e8\u3002

"},{"location":"zh/modelzoo/yolov7/#_5","title":"\u8bad\u7ec3","text":""},{"location":"zh/modelzoo/yolov7/#-","title":"- \u5206\u5e03\u5f0f\u8bad\u7ec3","text":"

\u4f7f\u7528\u9884\u7f6e\u7684\u8bad\u7ec3\u914d\u65b9\u53ef\u4ee5\u8f7b\u677e\u91cd\u73b0\u62a5\u544a\u7684\u7ed3\u679c\u3002\u5982\u9700\u5728\u591a\u53f0Ascend 910\u8bbe\u5907\u4e0a\u8fdb\u884c\u5206\u5e03\u5f0f\u8bad\u7ec3\uff0c\u8bf7\u8fd0\u884c

# \u5728\u591a\u53f0GPU/Ascend\u8bbe\u5907\u4e0a\u8fdb\u884c\u5206\u5e03\u5f0f\u8bad\u7ec3\nmpirun -n 8 python train.py --config ./configs/yolov7/yolov7.yaml --device_target Ascend --is_parallel True\n

\u5982\u679c\u811a\u672c\u7531root\u7528\u6237\u6267\u884c\uff0c\u5219\u5fc5\u987b\u5728mpirun\u4e2d\u6dfb\u52a0--allow-run-as-root\u53c2\u6570\u3002

\u540c\u6837\u7684\uff0c\u60a8\u53ef\u4ee5\u4f7f\u7528\u4e0a\u8ff0mpirun\u547d\u4ee4\u5728\u591a\u53f0GPU\u8bbe\u5907\u4e0a\u8bad\u7ec3\u6a21\u578b\u3002

\u6709\u5173\u6240\u6709\u8d85\u53c2\u6570\u7684\u8be6\u7ec6\u8bf4\u660e\uff0c\u8bf7\u53c2\u9605config.py\u3002

\u6ce8\u610f\uff1a \u7531\u4e8e\u5168\u5c40batch size\uff08batch_size x \u8bbe\u5907\u6570\uff09\u662f\u4e00\u4e2a\u91cd\u8981\u7684\u8d85\u53c2\u6570\uff0c\u5efa\u8bae\u4fdd\u6301\u5168\u5c40batch size\u4e0d\u53d8\u8fdb\u884c\u590d\u5236\uff0c\u6216\u8005\u5c06\u5b66\u4e60\u7387\u7ebf\u6027\u8c03\u6574\u4e3a\u65b0\u7684\u5168\u5c40batch size\u3002

"},{"location":"zh/modelzoo/yolov7/#-_1","title":"- \u5355\u5361\u8bad\u7ec3","text":"

\u5982\u679c\u60a8\u60f3\u5728\u8f83\u5c0f\u7684\u6570\u636e\u96c6\u4e0a\u8bad\u7ec3\u6216\u5fae\u8c03\u6a21\u578b\u800c\u4e0d\u8fdb\u884c\u5206\u5e03\u5f0f\u8bad\u7ec3\uff0c\u8bf7\u8fd0\u884c\uff1a

# \u5728 CPU/GPU/Ascend \u8bbe\u5907\u4e0a\u8fdb\u884c\u5355\u5361\u8bad\u7ec3\npython train.py --config ./configs/yolov7/yolov7.yaml --device_target Ascend\n
"},{"location":"zh/modelzoo/yolov7/#_6","title":"\u9a8c\u8bc1\u548c\u6d4b\u8bd5","text":"

\u8981\u9a8c\u8bc1\u8bad\u7ec3\u6a21\u578b\u7684\u51c6\u786e\u6027\uff0c\u60a8\u53ef\u4ee5\u4f7f\u7528 test.py \u5e76\u4f7f\u7528 --weight \u4f20\u5165\u6743\u91cd\u8def\u5f84\u3002

python test.py --config ./configs/yolov7/yolov7.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt\n
"},{"location":"zh/modelzoo/yolov7/#_7","title":"\u90e8\u7f72","text":"

\u8be6\u89c1 \u90e8\u7f72\u3002

"},{"location":"zh/modelzoo/yolov7/#_8","title":"\u5f15\u7528","text":"

[1] Chien-Yao Wang, Alexey Bochkovskiy, and HongYuan Mark Liao. Yolov7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. arXiv preprint arXiv:2207.02696, 2022.

"},{"location":"zh/modelzoo/yolov8/#_1","title":"\u6458\u8981","text":"

Ultralytics YOLOv8 \u7531 Ultralytics \u5f00\u53d1\uff0c\u662f\u4e00\u6b3e\u5c16\u7aef\u7684\u3001\u6700\u5148\u8fdb\u7684 (SOTA) \u6a21\u578b\uff0c\u5b83\u4ee5\u4e4b\u524d YOLO \u7248\u672c\u7684\u6210\u529f\u4e3a\u57fa\u7840\uff0c\u5e76\u5f15\u5165\u4e86\u65b0\u529f\u80fd\u548c\u6539\u8fdb\uff0c\u4ee5\u8fdb\u4e00\u6b65\u63d0\u9ad8\u6027\u80fd\u548c\u7075\u6d3b\u6027\u3002YOLOv8 \u65e8\u5728\u5feb\u901f\u3001\u51c6\u786e\u4e14\u6613\u4e8e\u4f7f\u7528\uff0c\u4f7f\u5176\u6210\u4e3a\u5404\u79cd\u7269\u4f53\u68c0\u6d4b\u3001\u56fe\u50cf\u5206\u5272\u548c\u56fe\u50cf\u5206\u7c7b\u4efb\u52a1\u7684\u7edd\u4f73\u9009\u62e9\u3002

"},{"location":"zh/modelzoo/yolov8/#_2","title":"\u7ed3\u679c","text":""},{"location":"zh/modelzoo/yolov8/#_3","title":"\u56fe\u50cf\u68c0\u6d4b","text":"\u4f7f\u7528\u56fe\u6a21\u5f0f\u5728 Ascend 910(8p) \u4e0a\u6d4b\u8bd5\u7684\u8868\u73b0 Name Scale BatchSize ImageSize Dataset Box mAP (%) Params Recipe Download YOLOv8 N 16 * 8 640 MS COCO 2017 37.2 3.2M yaml weights YOLOv8 S 16 * 8 640 MS COCO 2017 44.6 11.2M yaml weights YOLOv8 M 16 * 8 640 MS COCO 2017 50.5 25.9M yaml weights YOLOv8 L 16 * 8 640 MS COCO 2017 52.8 43.7M yaml weights YOLOv8 X 16 * 8 640 MS COCO 2017 53.7 68.2M yaml weights \u5728Ascend 910*(8p)\u4e0a\u6d4b\u8bd5\u7684\u8868\u73b0 Name Scale BatchSize ImageSize Dataset Box mAP (%) ms/step Params Recipe Download YOLOv8 N 16 * 8 640 MS COCO 2017 37.3 373.55 3.2M yaml weights YOLOv8 S 16 * 8 640 MS COCO 2017 44.7 365.53 11.2M yaml weights"},{"location":"zh/modelzoo/yolov8/#_4","title":"\u56fe\u50cf\u5206\u5272","text":"\u4f7f\u7528\u56fe\u6a21\u5f0f\u5728 Ascend 910(8p) \u4e0a\u6d4b\u8bd5\u7684\u8868\u73b0 Name Scale BatchSize ImageSize Dataset Box mAP (%) Mask mAP (%) Params Recipe Download YOLOv8-seg X 16 * 8 640 MS COCO 2017 52.5 42.9 71.8M yaml weights"},{"location":"zh/modelzoo/yolov8/#_5","title":"\u8bf4\u660e","text":"
  • Box mAP\uff1a\u9a8c\u8bc1\u96c6\u4e0a\u6d4b\u8bd5\u51fa\u7684\u51c6\u786e\u5ea6\u3002
  • \u6211\u4eec\u53c2\u8003\u4e86\u5e38\u7528\u7684\u7b2c\u4e09\u65b9 YOLOV8 \u91cd\u73b0\u4e86P5\uff08\u5927\u76ee\u6807\uff09\u7cfb\u5217\u6a21\u578b\u3002
"},{"location":"zh/modelzoo/yolov8/#_6","title":"\u5feb\u901f\u5165\u95e8","text":"

\u8be6\u60c5\u8bf7\u53c2\u9605 MindYOLO \u4e2d\u7684 \u5feb\u901f\u5165\u95e8\u3002

"},{"location":"zh/modelzoo/yolov8/#_7","title":"\u8bad\u7ec3","text":""},{"location":"zh/modelzoo/yolov8/#-","title":"- \u5206\u5e03\u5f0f\u8bad\u7ec3","text":"

\u4f7f\u7528\u9884\u7f6e\u7684\u8bad\u7ec3\u914d\u65b9\u53ef\u4ee5\u8f7b\u677e\u91cd\u73b0\u62a5\u544a\u7684\u7ed3\u679c\u3002\u5982\u9700\u5728\u591a\u53f0Ascend 910\u8bbe\u5907\u4e0a\u8fdb\u884c\u5206\u5e03\u5f0f\u8bad\u7ec3\uff0c\u8bf7\u8fd0\u884c

# \u5728\u591a\u53f0GPU/Ascend\u8bbe\u5907\u4e0a\u8fdb\u884c\u5206\u5e03\u5f0f\u8bad\u7ec3\nmpirun -n 8 python train.py --config ./configs/yolov8/yolov8n.yaml --device_target Ascend --is_parallel True\n

\u5982\u679c\u811a\u672c\u7531root\u7528\u6237\u6267\u884c\uff0c\u5219\u5fc5\u987b\u5728mpirun\u4e2d\u6dfb\u52a0--allow-run-as-root\u53c2\u6570\u3002

\u540c\u6837\u7684\uff0c\u60a8\u53ef\u4ee5\u4f7f\u7528\u4e0a\u8ff0mpirun\u547d\u4ee4\u5728\u591a\u53f0GPU\u8bbe\u5907\u4e0a\u8bad\u7ec3\u6a21\u578b\u3002

\u6709\u5173\u6240\u6709\u8d85\u53c2\u6570\u7684\u8be6\u7ec6\u8bf4\u660e\uff0c\u8bf7\u53c2\u9605config.py\u3002

\u6ce8\u610f\uff1a \u7531\u4e8e\u5168\u5c40batch size\uff08batch_size x \u8bbe\u5907\u6570\uff09\u662f\u4e00\u4e2a\u91cd\u8981\u7684\u8d85\u53c2\u6570\uff0c\u5efa\u8bae\u4fdd\u6301\u5168\u5c40batch size\u4e0d\u53d8\u8fdb\u884c\u590d\u5236\uff0c\u6216\u8005\u5c06\u5b66\u4e60\u7387\u7ebf\u6027\u8c03\u6574\u4e3a\u65b0\u7684\u5168\u5c40batch size\u3002

"},{"location":"zh/modelzoo/yolov8/#-_1","title":"- \u5355\u5361\u8bad\u7ec3","text":"

\u5982\u679c\u60a8\u60f3\u5728\u8f83\u5c0f\u7684\u6570\u636e\u96c6\u4e0a\u8bad\u7ec3\u6216\u5fae\u8c03\u6a21\u578b\u800c\u4e0d\u8fdb\u884c\u5206\u5e03\u5f0f\u8bad\u7ec3\uff0c\u8bf7\u8fd0\u884c\uff1a

# \u5728 CPU/GPU/Ascend \u8bbe\u5907\u4e0a\u8fdb\u884c\u5355\u5361\u8bad\u7ec3\npython train.py --config ./configs/yolov8/yolov8n.yaml --device_target Ascend\n
"},{"location":"zh/modelzoo/yolov8/#_8","title":"\u9a8c\u8bc1\u548c\u6d4b\u8bd5","text":"

\u8981\u9a8c\u8bc1\u8bad\u7ec3\u6a21\u578b\u7684\u51c6\u786e\u6027\uff0c\u60a8\u53ef\u4ee5\u4f7f\u7528 test.py \u5e76\u4f7f\u7528 --weight \u4f20\u5165\u6743\u91cd\u8def\u5f84\u3002

python test.py --config ./configs/yolov8/yolov8n.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt\n
"},{"location":"zh/modelzoo/yolov8/#_9","title":"\u90e8\u7f72","text":"

\u8be6\u89c1 \u90e8\u7f72\u3002

"},{"location":"zh/modelzoo/yolov8/#_10","title":"\u5f15\u7528","text":"

[1] Jocher Glenn. Ultralytics YOLOv8. https://github.com/ultralytics/ultralytics, 2023.

"},{"location":"zh/modelzoo/yolox/#_1","title":"\u6458\u8981","text":"

YOLOX \u662f\u4e00\u6b3e\u65b0\u578b\u9ad8\u6027\u80fd\u68c0\u6d4b\u6a21\u578b\uff0c\u5728 YOLO \u7cfb\u5217\u7684\u57fa\u7840\u4e0a\u8fdb\u884c\u4e86\u4e00\u4e9b\u7ecf\u9a8c\u4e30\u5bcc\u7684\u6539\u8fdb\u3002\u6211\u4eec\u5c06 YOLO \u68c0\u6d4b\u5668\u6539\u4e3a\u65e0\u951a\u65b9\u5f0f\uff0c\u5e76\u91c7\u7528\u5176\u4ed6\u5148\u8fdb\u7684\u68c0\u6d4b\u6280\u672f\uff0c\u4f8b\u5982\u89e3\u8026\u5934\u548c\u9886\u5148\u7684\u6807\u7b7e\u5206\u914d\u7b56\u7565 SimOTA\uff0c\u4ee5\u5728\u5927\u89c4\u6a21\u6a21\u578b\u4e2d\u5b9e\u73b0\u6700\u4f73\u6548\u679c\uff1a\u5bf9\u4e8e\u53ea\u6709 0.91M \u53c2\u6570\u548c 1.08G FLOPs \u7684 YOLO-Nano\uff0c\u6211\u4eec\u5728 COCO \u4e0a\u83b7\u5f97\u4e86 25.3% \u7684 AP\uff0c\u6bd4 NanoDet \u9ad8\u51fa 1.8% AP\uff1b\u5bf9\u4e8e\u4e1a\u754c\u4f7f\u7528\u6700\u5e7f\u6cdb\u7684\u68c0\u6d4b\u5668\u4e4b\u4e00 YOLOv3\uff0c\u6211\u4eec\u5c06\u5176\u5728 COCO \u4e0a\u7684 AP \u63d0\u5347\u5230 47.3%\uff0c\u6bd4\u76ee\u524d\u7684\u6700\u4f73\u5b9e\u8df5\u9ad8\u51fa 3.0% AP\uff1b\u5bf9\u4e8e\u53c2\u6570\u91cf\u4e0e YOLOv4-CSP \u5927\u81f4\u76f8\u540c\u7684 YOLOX-L\uff0cYOLOv5-L \u5728 Tesla V100 \u4e0a\u4ee5 68.9 FPS \u7684\u901f\u5ea6\u5728 COCO \u4e0a\u5b9e\u73b0\u4e86 50.0% \u7684 AP\uff0c\u6bd4 YOLOv5-L \u9ad8\u51fa 1.8% \u7684 AP\u3002\u6b64\u5916\uff0c\u6211\u4eec\u4f7f\u7528\u5355\u4e2a YOLOX-L \u6a21\u578b\u5728\u6d41\u5f0f\u611f\u77e5\u6311\u6218\u8d5b\uff08CVPR 2021 \u81ea\u52a8\u9a7e\u9a76\u7814\u8ba8\u4f1a\uff09\u4e0a\u83b7\u5f97\u4e86\u7b2c\u4e00\u540d\u3002

"},{"location":"zh/modelzoo/yolox/#_2","title":"\u7ed3\u679c","text":"\u4f7f\u7528\u56fe\u6a21\u5f0f\u5728 Ascend 910(8p) \u4e0a\u6d4b\u8bd5\u7684\u8868\u73b0 Name Scale BatchSize ImageSize Dataset Box mAP (%) Params Recipe Download YOLOX N 8 * 8 416 MS COCO 2017 24.1 0.9M yaml weights YOLOX Tiny 8 * 8 416 MS COCO 2017 33.3 5.1M yaml weights YOLOX S 8 * 8 640 MS COCO 2017 40.7 9.0M yaml weights YOLOX M 8 * 8 640 MS COCO 2017 46.7 25.3M yaml weights YOLOX L 8 * 8 640 MS COCO 2017 49.2 54.2M yaml weights YOLOX X 8 * 8 640 MS COCO 2017 51.6 99.1M yaml weights YOLOX Darknet53 8 * 8 640 MS COCO 2017 47.7 63.7M yaml weights \u5728Ascend 910*(8p)\u4e0a\u6d4b\u8bd5\u7684\u8868\u73b0 Name Scale BatchSize ImageSize Dataset Box mAP (%) ms/step Params Recipe Download YOLOX S 8 * 8 640 MS COCO 2017 41.0 242.15 9.0M yaml weights"},{"location":"zh/modelzoo/yolox/#_3","title":"\u8bf4\u660e","text":"
  • Box mAP: \u9a8c\u8bc1\u96c6\u4e0a\u6d4b\u8bd5\u51fa\u7684\u51c6\u786e\u5ea6\u3002
  • \u6211\u4eec\u53c2\u8003\u4e86\u5b98\u65b9\u7684 YOLOX \u6765\u91cd\u73b0\u7ed3\u679c.
"},{"location":"zh/modelzoo/yolox/#_4","title":"\u5feb\u901f\u5165\u95e8","text":"

\u8be6\u60c5\u8bf7\u53c2\u9605 MindYOLO \u4e2d\u7684 \u5feb\u901f\u5165\u95e8\u3002

"},{"location":"zh/modelzoo/yolox/#_5","title":"\u8bad\u7ec3","text":""},{"location":"zh/modelzoo/yolox/#-","title":"- \u5206\u5e03\u5f0f\u8bad\u7ec3","text":"

\u4f7f\u7528\u9884\u7f6e\u7684\u8bad\u7ec3\u914d\u65b9\u53ef\u4ee5\u8f7b\u677e\u91cd\u73b0\u62a5\u544a\u7684\u7ed3\u679c\u3002\u5982\u9700\u5728\u591a\u53f0Ascend 910\u8bbe\u5907\u4e0a\u8fdb\u884c\u5206\u5e03\u5f0f\u8bad\u7ec3\uff0c\u8bf7\u8fd0\u884c

# \u5728\u591a\u53f0GPU/Ascend\u8bbe\u5907\u4e0a\u8fdb\u884c\u5206\u5e03\u5f0f\u8bad\u7ec3\nmpirun -n 8 python train.py --config ./configs/yolox/yolox-s.yaml --device_target Ascend --is_parallel True\n

\u5982\u679c\u811a\u672c\u7531root\u7528\u6237\u6267\u884c\uff0c\u5219\u5fc5\u987b\u5728mpirun\u4e2d\u6dfb\u52a0--allow-run-as-root\u53c2\u6570\u3002

\u540c\u6837\u7684\uff0c\u60a8\u53ef\u4ee5\u4f7f\u7528\u4e0a\u8ff0mpirun\u547d\u4ee4\u5728\u591a\u53f0GPU\u8bbe\u5907\u4e0a\u8bad\u7ec3\u6a21\u578b\u3002

\u6709\u5173\u6240\u6709\u8d85\u53c2\u6570\u7684\u8be6\u7ec6\u8bf4\u660e\uff0c\u8bf7\u53c2\u9605config.py\u3002

**\u6ce8\u610f\uff1a**\u7531\u4e8e\u5168\u5c40batch size\uff08batch_size x \u8bbe\u5907\u6570\uff09\u662f\u4e00\u4e2a\u91cd\u8981\u7684\u8d85\u53c2\u6570\uff0c\u5efa\u8bae\u4fdd\u6301\u5168\u5c40batch size\u4e0d\u53d8\u8fdb\u884c\u590d\u5236\uff0c\u6216\u8005\u5c06\u5b66\u4e60\u7387\u7ebf\u6027\u8c03\u6574\u4e3a\u65b0\u7684\u5168\u5c40batch size\u3002

"},{"location":"zh/modelzoo/yolox/#-_1","title":"- \u5355\u5361\u8bad\u7ec3","text":"

\u5982\u679c\u60a8\u60f3\u5728\u8f83\u5c0f\u7684\u6570\u636e\u96c6\u4e0a\u8bad\u7ec3\u6216\u5fae\u8c03\u6a21\u578b\u800c\u4e0d\u8fdb\u884c\u5206\u5e03\u5f0f\u8bad\u7ec3\uff0c\u8bf7\u8fd0\u884c\uff1a

# \u5728 CPU/GPU/Ascend \u8bbe\u5907\u4e0a\u8fdb\u884c\u5355\u5361\u8bad\u7ec3\npython train.py --config ./configs/yolox/yolox-s.yaml --device_target Ascend\n
"},{"location":"zh/modelzoo/yolox/#_6","title":"\u9a8c\u8bc1\u548c\u6d4b\u8bd5","text":"

\u8981\u9a8c\u8bc1\u8bad\u7ec3\u6a21\u578b\u7684\u51c6\u786e\u6027\uff0c\u60a8\u53ef\u4ee5\u4f7f\u7528 test.py \u5e76\u4f7f\u7528 --weight \u4f20\u5165\u6743\u91cd\u8def\u5f84\u3002

python test.py --config ./configs/yolox/yolox-s.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt\n
"},{"location":"zh/modelzoo/yolox/#_7","title":"\u90e8\u7f72","text":"

\u8be6\u89c1 \u90e8\u7f72\u3002

"},{"location":"zh/modelzoo/yolox/#_8","title":"\u5f15\u7528","text":"

[1] Zheng Ge. YOLOX: Exceeding YOLO Series in 2021. https://arxiv.org/abs/2107.08430, 2021.

"},{"location":"zh/notes/changelog/","title":"\u66f4\u65b0\u65e5\u5fd7","text":"

\u5373\u5c06\u5230\u6765

"},{"location":"zh/notes/code_of_conduct/","title":"\u884c\u4e3a\u51c6\u5219","text":"

\u5373\u5c06\u5230\u6765

"},{"location":"zh/notes/contributing/","title":"MindYOLO \u8d21\u732e\u6307\u5357","text":""},{"location":"zh/notes/contributing/#_1","title":"\u8d21\u732e\u8005\u8bb8\u53ef\u534f\u8bae","text":"

\u9996\u6b21\u5411 MindYOLO \u793e\u533a\u63d0\u4ea4\u4ee3\u7801\u524d\uff0c\u9700\u7b7e\u7f72 CLA\u3002

\u4e2a\u4eba\u8d21\u732e\u8005\u8bf7\u53c2\u8003 ICLA \u5728\u7ebf\u6587\u6863 \u4e86\u89e3\u8be6\u7ec6\u4fe1\u606f\u3002

"},{"location":"zh/notes/contributing/#_2","title":"\u5165\u95e8\u6307\u5357","text":"
  • \u5728 Github \u4e0a Fork \u4ee3\u7801\u5e93\u3002
  • \u9605\u8bfb README.md\u3002
"},{"location":"zh/notes/contributing/#_3","title":"\u8d21\u732e\u6d41\u7a0b","text":""},{"location":"zh/notes/contributing/#_4","title":"\u4ee3\u7801\u98ce\u683c","text":"

\u8bf7\u9075\u5faa\u6b64\u98ce\u683c\uff0c\u4ee5\u4fbf MindYOLO \u6613\u4e8e\u5ba1\u67e5\u3001\u7ef4\u62a4\u548c\u5f00\u53d1\u3002

  • \u7f16\u7801\u6307\u5357

MindYOLO \u793e\u533a\u4f7f\u7528 Python PEP 8 \u7f16\u7801\u98ce\u683c \u5efa\u8bae\u7684 Python \u7f16\u7801\u98ce\u683c\u548c Google C++ \u7f16\u7801\u6307\u5357 \u5efa\u8bae\u7684 C++ \u7f16\u7801\u98ce\u683c\u3002 CppLint\u3001CppCheck\u3001CMakeLint\u3001CodeSpell\u3001Lizard\u3001ShellCheck \u548c PyLint \u7528\u4e8e\u68c0\u67e5\u4ee3\u7801\u683c\u5f0f\uff0c\u5efa\u8bae\u5728 IDE \u4e2d\u5b89\u88c5\u8fd9\u4e9b\u63d2\u4ef6\u3002

  • \u5355\u5143\u6d4b\u8bd5\u6307\u5357

MindYOLO \u793e\u533a\u4f7f\u7528 pytest \u5efa\u8bae\u7684 Python \u5355\u5143\u6d4b\u8bd5\u98ce\u683c\u548c Googletest Primer \u5efa\u8bae\u7684 C++ \u5355\u5143\u6d4b\u8bd5\u98ce\u683c\u3002\u6d4b\u8bd5\u7528\u4f8b\u7684\u8bbe\u8ba1\u610f\u56fe\u5e94\u8be5\u901a\u8fc7\u5176\u6ce8\u91ca\u540d\u79f0\u6765\u4f53\u73b0\u3002

  • \u91cd\u6784\u6307\u5357

\u6211\u4eec\u9f13\u52b1\u5f00\u53d1\u4eba\u5458\u91cd\u6784\u6211\u4eec\u7684\u4ee3\u7801\u4ee5\u6d88\u9664 \u4ee3\u7801\u5f02\u5473\u3002\u6240\u6709\u4ee3\u7801\u90fd\u5e94\u7b26\u5408\u7f16\u7801\u98ce\u683c\u548c\u6d4b\u8bd5\u98ce\u683c\u7684\u9700\u6c42\uff0c\u91cd\u6784\u4ee3\u7801\u4e5f\u4e0d\u4f8b\u5916\u3002Lizard \u5bf9 nloc\uff08\u65e0\u6ce8\u91ca\u7684\u4ee3\u7801\u884c\u6570\uff09\u7684\u9608\u503c\u4e3a 100\uff0c\u5bf9 cnc\uff08\u5faa\u73af\u590d\u6742\u5ea6\u6570\uff09\u7684\u9608\u503c\u4e3a 20\uff0c\u5f53\u60a8\u6536\u5230 Lizard \u8b66\u544a\u65f6\uff0c\u60a8\u5fc5\u987b\u91cd\u6784\u8981\u5408\u5e76\u7684\u4ee3\u7801\u3002

  • \u6587\u6863\u6307\u5357

\u6211\u4eec\u4f7f\u7528 MarkdownLint \u68c0\u67e5 markdown \u6587\u6863\u7684\u683c\u5f0f\u3002MindYOLO CI \u6839\u636e\u9ed8\u8ba4\u914d\u7f6e\u4fee\u6539\u4e86\u4ee5\u4e0b\u89c4\u5219\u3002

  • MD007\uff08\u65e0\u5e8f\u5217\u8868\u7f29\u8fdb\uff09\uff1aindent**\u53c2\u6570\u8bbe\u7f6e\u4e3a**4\uff0c\u8868\u793a\u65e0\u5e8f\u5217\u8868\u4e2d\u7684\u6240\u6709\u5185\u5bb9\u90fd\u9700\u8981\u4f7f\u7528\u56db\u4e2a\u7a7a\u683c\u8fdb\u884c\u7f29\u8fdb\u3002
  • MD009\uff08\u884c\u672b\u7a7a\u683c\uff09\uff1abr_spaces**\u53c2\u6570\u8bbe\u7f6e\u4e3a**2\uff0c\u8868\u793a\u884c\u672b\u53ef\u4ee5\u67090\u4e2a\u62162\u4e2a\u7a7a\u683c\u3002
  • MD029\uff08\u6709\u5e8f\u5217\u8868\u7684\u5e8f\u53f7\uff09\uff1astyle**\u53c2\u6570\u8bbe\u7f6e\u4e3a**ordered\uff0c\u8868\u793a\u6709\u5e8f\u5217\u8868\u7684\u5e8f\u53f7\u6309\u5347\u5e8f\u6392\u5217\u3002

\u5177\u4f53\u8bf7\u53c2\u89c1RULES\u3002

"},{"location":"zh/notes/contributing/#fork-pull","title":"Fork-Pull\u5f00\u53d1\u6a21\u5f0f","text":"
  • Fork MindYOLO\u4ed3\u5e93

\u5728\u5411MindYOLO\u9879\u76ee\u63d0\u4ea4\u4ee3\u7801\u4e4b\u524d\uff0c\u8bf7\u786e\u4fdd\u8be5\u9879\u76ee\u5df2\u7ecffork\u5230\u4f60\u81ea\u5df1\u7684\u4ed3\u5e93\u3002\u8fd9\u610f\u5473\u7740MindYOLO \u4ed3\u5e93\u548c\u4f60\u81ea\u5df1\u7684\u4ed3\u5e93\u4e4b\u95f4\u4f1a\u5e76\u884c\u5f00\u53d1\uff0c\u6240\u4ee5\u8981\u5c0f\u5fc3\u907f\u514d\u4e24\u8005\u4e0d\u4e00\u81f4\u3002

  • \u514b\u9686\u8fdc\u7a0b\u4ed3\u5e93

\u5982\u679c\u8981\u5c06\u4ee3\u7801\u4e0b\u8f7d\u5230\u672c\u5730\u673a\u5668\uff0cgit \u662f\u6700\u597d\u7684\u65b9\u5f0f\uff1a

# \u5bf9\u4e8e GitHub\ngit clone https://github.com/{insert_your_forked_repo}/mindyolo.git\ngit remote add upper https://github.com/mindspore-lab/mindyolo.git\n
  • \u672c\u5730\u5f00\u53d1\u4ee3\u7801

\u4e3a\u907f\u514d\u591a\u4e2a\u5206\u652f\u4e4b\u95f4\u4e0d\u4e00\u81f4\uff0cSUGGESTED \u5efa\u8bae\u7b7e\u51fa\u5230\u65b0\u5206\u652f\uff1a

git checkout -b {new_branch_name} origin/master\n

\u4ee5 master \u5206\u652f\u4e3a\u4f8b\uff0cMindYOLO \u53ef\u80fd\u4f1a\u6839\u636e\u9700\u8981\u521b\u5efa\u7248\u672c\u5206\u652f\u548c\u4e0b\u6e38\u5f00\u53d1\u5206\u652f\uff0c\u8bf7\u5148\u4fee\u590d\u4e0a\u6e38\u7684 bug\u3002 \u7136\u540e\u4f60\u53ef\u4ee5\u4efb\u610f\u66f4\u6539\u4ee3\u7801\u3002

  • \u5c06\u4ee3\u7801\u63a8\u9001\u5230\u8fdc\u7a0b\u4ed3\u5e93

\u66f4\u65b0\u4ee3\u7801\u540e\uff0c\u5e94\u4ee5\u6b63\u5f0f\u65b9\u5f0f\u63a8\u9001\u66f4\u65b0\uff1a

git add .\ngit status # \u68c0\u67e5\u66f4\u65b0\u72b6\u6001\ngit commit -m \"\u60a8\u7684\u63d0\u4ea4\u6807\u9898\"\ngit commit -s --amend #\u6dfb\u52a0\u63d0\u4ea4\u7684\u5177\u4f53\u63cf\u8ff0\ngit push origin {new_branch_name}\n
  • \u5c06\u8bf7\u6c42\u62c9\u53d6\u5230 MindYOLO \u4ed3\u5e93

\u6700\u540e\u4e00\u6b65\uff0c\u60a8\u9700\u8981\u5c06\u65b0\u5206\u652f\u4e0e MindYOLO master \u5206\u652f\u8fdb\u884c\u6bd4\u8f83\u3002\u5b8c\u6210\u62c9\u53d6\u8bf7\u6c42\u540e\uff0cJenkins CI \u5c06\u81ea\u52a8\u8bbe\u7f6e\u4e3a\u6784\u5efa\u6d4b\u8bd5\u3002\u60a8\u7684\u62c9\u53d6\u8bf7\u6c42\u5e94\u5c3d\u5feb\u5408\u5e76\u5230\u4e0a\u6e38\u4e3b\u5206\u652f\u4e2d\uff0c\u4ee5\u964d\u4f4e\u5408\u5e76\u98ce\u9669\u3002

"},{"location":"zh/notes/contributing/#_5","title":"\u62a5\u544a\u95ee\u9898","text":"

\u4e3a\u9879\u76ee\u505a\u51fa\u8d21\u732e\u7684\u4e00\u79cd\u597d\u65b9\u6cd5\u662f\u5728\u9047\u5230\u95ee\u9898\u65f6\u53d1\u9001\u8be6\u7ec6\u62a5\u544a\u3002\u6211\u4eec\u59cb\u7ec8\u6b23\u8d4f\u5199\u5f97\u597d\u3001\u8be6\u5c3d\u7684\u9519\u8bef\u62a5\u544a\uff0c\u5e76\u4f1a\u4e3a\u6b64\u611f\u8c22\u60a8\uff01

\u62a5\u544a\u95ee\u9898\u65f6\uff0c\u8bf7\u53c2\u8003\u4ee5\u4e0b\u683c\u5f0f\uff1a

  • \u60a8\u4f7f\u7528\u7684\u662f\u54ea\u4e2a\u7248\u672c\u7684\u73af\u5883\uff08MindSpore\u3001os\u3001python\u3001MindYOLO \u7b49\uff09\uff1f
  • \u8fd9\u662f\u9519\u8bef\u62a5\u544a\u8fd8\u662f\u529f\u80fd\u8bf7\u6c42\uff1f
  • \u4ec0\u4e48\u7c7b\u578b\u7684\u95ee\u9898\uff0c\u8bf7\u6dfb\u52a0\u6807\u7b7e\u4ee5\u5728\u95ee\u9898\u4eea\u8868\u677f\u4e0a\u7a81\u51fa\u663e\u793a\u5b83\u3002
  • \u53d1\u751f\u4e86\u4ec0\u4e48\uff1f
  • \u60a8\u671f\u671b\u53d1\u751f\u4ec0\u4e48\uff1f
  • \u5982\u4f55\u91cd\u73b0\u5b83\uff1f\uff08\u5c3d\u53ef\u80fd\u7b80\u77ed\u548c\u51c6\u786e\uff09
  • \u7ed9\u5ba1\u9605\u8005\u7684\u7279\u522b\u8bf4\u660e\uff1f

\u95ee\u9898\u54a8\u8be2\uff1a

  • \u5982\u679c\u60a8\u53d1\u73b0\u4e00\u4e2a\u672a\u5173\u95ed\u7684\u95ee\u9898\uff0c\u800c\u8fd9\u6b63\u662f\u60a8\u8981\u89e3\u51b3\u7684\u95ee\u9898\uff0c \u8bf7\u5728\u8be5\u95ee\u9898\u4e0a\u53d1\u8868\u4e00\u4e9b\u8bc4\u8bba\uff0c\u544a\u8bc9\u5176\u4ed6\u4eba\u60a8\u5c06\u8d1f\u8d23\u8be5\u95ee\u9898\u3002
  • \u5982\u679c\u95ee\u9898\u6253\u5f00\u4e86\u4e00\u6bb5\u65f6\u95f4\uff0c \u5efa\u8bae\u8d21\u732e\u8005\u5728\u89e3\u51b3\u8be5\u95ee\u9898\u4e4b\u524d\u8fdb\u884c\u9884\u68c0\u67e5\u3002
  • \u5982\u679c\u60a8\u89e3\u51b3\u4e86\u81ea\u5df1\u62a5\u544a\u7684\u95ee\u9898\uff0c \u4e5f\u9700\u8981\u5728\u5173\u95ed\u8be5\u95ee\u9898\u4e4b\u524d\u901a\u77e5\u5176\u4ed6\u4eba\u3002
  • \u5982\u679c\u60a8\u5e0c\u671b\u95ee\u9898\u5c3d\u5feb\u5f97\u5230\u56de\u590d\uff0c \u8bf7\u5c1d\u8bd5\u4e3a\u5176\u6dfb\u52a0\u6807\u7b7e\uff0c\u60a8\u53ef\u4ee5\u5728 \u6807\u7b7e\u5217\u8868 \u4e0a\u627e\u5230\u5404\u79cd\u6807\u7b7e
"},{"location":"zh/notes/contributing/#pr","title":"\u63d0\u51fa PR","text":"
  • \u5728 GitHub \u4e0a\u4ee5 issue \u5f62\u5f0f\u63d0\u51fa\u60a8\u7684\u60f3\u6cd5

  • \u5982\u679c\u662f\u9700\u8981\u5927\u91cf\u8bbe\u8ba1\u7ec6\u8282\u7684\u65b0\u529f\u80fd\uff0c\u8fd8\u5e94\u63d0\u4ea4\u8bbe\u8ba1\u63d0\u6848\u3002

  • \u5728\u95ee\u9898\u8ba8\u8bba\u548c\u8bbe\u8ba1\u63d0\u6848\u5ba1\u67e5\u4e2d\u8fbe\u6210\u5171\u8bc6\u540e\uff0c\u5b8c\u6210\u5206\u53c9\u4ed3\u5e93\u7684\u5f00\u53d1\u5e76\u63d0\u4ea4 PR\u3002

  • \u4efb\u4f55 PR \u90fd\u5fc5\u987b\u6536\u5230\u6765\u81ea\u6279\u51c6\u8005\u7684 2+ LGTM \u624d\u80fd\u88ab\u5141\u8bb8\u3002\u8bf7\u6ce8\u610f\uff0c\u6279\u51c6\u8005\u4e0d\u5f97\u5728\u81ea\u5df1\u7684 PR \u4e0a\u6dfb\u52a0 LGTM\u3002

  • PR \u7ecf\u8fc7\u5145\u5206\u8ba8\u8bba\u540e\uff0c\u5c06\u6839\u636e\u8ba8\u8bba\u7ed3\u679c\u8fdb\u884c\u5408\u5e76\u3001\u653e\u5f03\u6216\u62d2\u7edd\u3002

PR \u5efa\u8bae\uff1a

  • \u5e94\u907f\u514d\u4efb\u4f55\u4e0d\u76f8\u5173\u7684\u66f4\u6539\u3002
  • \u786e\u4fdd\u60a8\u7684\u63d0\u4ea4\u5386\u53f2\u8bb0\u5f55\u6709\u5e8f\u3002
  • \u59cb\u7ec8\u8ba9\u60a8\u7684\u5206\u652f\u4e0e\u4e3b\u5206\u652f\u4fdd\u6301\u4e00\u81f4\u3002
  • \u5bf9\u4e8e\u9519\u8bef\u4fee\u590d PR\uff0c\u8bf7\u786e\u4fdd\u6240\u6709\u76f8\u5173\u95ee\u9898\u90fd\u5df2\u94fe\u63a5\u3002
"},{"location":"zh/notes/faq/","title":"\u5e38\u89c1\u95ee\u9898","text":"

\u5373\u5c06\u5230\u6765

"},{"location":"zh/reference/data/","title":"\u6570\u636e","text":""},{"location":"zh/reference/data/#_2","title":"\u6570\u636e\u52a0\u8f7d","text":""},{"location":"zh/reference/data/#_3","title":"\u6570\u636e\u96c6","text":""},{"location":"zh/reference/models/","title":"\u6a21\u578b","text":""},{"location":"zh/reference/models/#_2","title":"\u521b\u5efa\u6a21\u578b","text":""},{"location":"zh/tutorials/configuration/","title":"\u914d\u7f6e","text":"

MindYOLO\u5957\u4ef6\u540c\u65f6\u652f\u6301yaml\u6587\u4ef6\u53c2\u6570\u548c\u547d\u4ee4\u884c\u53c2\u6570\u89e3\u6790\uff0c\u5e76\u5c06\u76f8\u5bf9\u56fa\u5b9a\u3001\u4e0e\u6a21\u578b\u5f3a\u76f8\u5173\u3001\u8f83\u4e3a\u590d\u6742\u6216\u8005\u542b\u6709\u5d4c\u5957\u7ed3\u6784\u7684\u53c2\u6570\u7f16\u5199\u6210yaml\u6587\u4ef6\uff0c\u9700\u6839\u636e\u5b9e\u9645\u5e94\u7528\u573a\u666f\u66f4\u6539\u6216\u8005\u8f83\u4e3a\u7b80\u5355\u7684\u53c2\u6570\u5219\u901a\u8fc7\u547d\u4ee4\u884c\u4f20\u5165\u3002

\u4e0b\u9762\u4ee5yolov3\u4e3a\u4f8b\uff0c\u89e3\u91ca\u5982\u4f55\u914d\u7f6e\u76f8\u5e94\u7684\u53c2\u6570\u3002

"},{"location":"zh/tutorials/configuration/#_2","title":"\u53c2\u6570\u7ee7\u627f\u5173\u7cfb","text":"

\u53c2\u6570\u4f18\u5148\u7ea7\u7531\u9ad8\u5230\u4f4e\u5982\u4e0b\uff0c\u51fa\u73b0\u540c\u540d\u53c2\u6570\u65f6\uff0c\u4f4e\u4f18\u5148\u7ea7\u53c2\u6570\u4f1a\u88ab\u9ad8\u4f18\u5148\u7ea7\u53c2\u6570\u8986\u76d6

  • \u7528\u6237\u547d\u4ee4\u884c\u4f20\u5165\u53c2\u6570
  • python\u6267\u884cpy\u6587\u4ef6\u4e2dparser\u7684\u9ed8\u8ba4\u53c2\u6570
  • \u547d\u4ee4\u884c\u4f20\u5165config\u53c2\u6570\u5bf9\u5e94\u7684yaml\u6587\u4ef6\u53c2\u6570
  • \u547d\u4ee4\u884c\u4f20\u5165config\u53c2\u6570\u5bf9\u5e94\u7684yaml\u6587\u4ef6\u4e2d__BASE__\u53c2\u6570\u4e2d\u5305\u542b\u7684yaml\u6587\u4ef6\u53c2\u6570\uff0c\u4f8b\u5982yolov3.yaml\u542b\u6709\u5982\u4e0b\u53c2\u6570\uff1a
    __BASE__: [\n'../coco.yaml',\n'./hyp.scratch.yaml',\n]\n
"},{"location":"zh/tutorials/configuration/#_3","title":"\u57fa\u7840\u53c2\u6570","text":""},{"location":"zh/tutorials/configuration/#_4","title":"\u53c2\u6570\u8bf4\u660e","text":"
  • device_target: \u6240\u7528\u8bbe\u5907\uff0cAscend/GPU/CPU
  • save_dir: \u8fd0\u884c\u7ed3\u679c\u4fdd\u5b58\u8def\u5f84\uff0c\u9ed8\u8ba4\u4e3a./runs
  • log_interval: \u6253\u5370\u65e5\u5fd7step\u95f4\u9694\uff0c\u9ed8\u8ba4\u4e3a100
  • is_parallel: \u662f\u5426\u5206\u5e03\u5f0f\u8bad\u7ec3\uff0c\u9ed8\u8ba4\u4e3aFalse
  • ms_mode: \u4f7f\u7528\u9759\u6001\u56fe\u6a21\u5f0f(0)\u6216\u52a8\u6001\u56fe\u6a21\u5f0f(1)\uff0c\u9ed8\u8ba4\u4e3a0\u3002
  • config: yaml\u914d\u7f6e\u6587\u4ef6\u8def\u5f84
  • per_batch_size: \u6bcf\u5f20\u5361batch size\uff0c\u9ed8\u8ba4\u4e3a32
  • epochs: \u8bad\u7ec3epoch\u6570\uff0c\u9ed8\u8ba4\u4e3a300
  • ...
"},{"location":"zh/tutorials/configuration/#parse","title":"parse\u53c2\u6570\u8bbe\u7f6e","text":"

\u8be5\u90e8\u5206\u53c2\u6570\u901a\u5e38\u7531\u547d\u4ee4\u884c\u4f20\u5165\uff0c\u793a\u4f8b\u5982\u4e0b\uff1a

mpirun --allow-run-as-root -n 8 python train.py --config ./configs/yolov7/yolov7.yaml  --is_parallel True --log_interval 50\n
"},{"location":"zh/tutorials/configuration/#_5","title":"\u6570\u636e\u96c6","text":""},{"location":"zh/tutorials/configuration/#_6","title":"\u53c2\u6570\u8bf4\u660e","text":"
  • dataset_name: \u6570\u636e\u96c6\u540d\u79f0
  • train_set: \u8bad\u7ec3\u96c6\u6240\u5728\u8def\u5f84
  • val_set: \u9a8c\u8bc1\u96c6\u6240\u5728\u8def\u5f84
  • test_set: \u6d4b\u8bd5\u96c6\u6240\u5728\u8def\u5f84
  • nc: \u6570\u636e\u96c6\u7c7b\u522b\u6570
  • names: \u7c7b\u522b\u540d\u79f0
  • ...
"},{"location":"zh/tutorials/configuration/#yaml","title":"yaml\u6587\u4ef6\u6837\u4f8b","text":"

\u8be5\u90e8\u5206\u53c2\u6570\u5728configs/coco.yaml\u4e2d\u5b9a\u4e49\uff0c\u901a\u5e38\u9700\u4fee\u6539\u5176\u4e2d\u7684\u6570\u636e\u96c6\u8def\u5f84

data:\ndataset_name: coco\n\ntrain_set: ./coco/train2017.txt  # 118287 images\nval_set: ./coco/val2017.txt  # 5000 images\ntest_set: ./coco/test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794\n\nnc: 80\n\n# class names\nnames: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',\n'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',\n'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',\n'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',\n'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',\n'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',\n'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',\n'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',\n'hair drier', 'toothbrush' ]\n
"},{"location":"zh/tutorials/configuration/#_7","title":"\u6570\u636e\u589e\u5f3a","text":""},{"location":"zh/tutorials/configuration/#_8","title":"\u53c2\u6570\u8bf4\u660e","text":"
  • num_parallel_workers: \u8bfb\u53d6\u6570\u636e\u7684\u5de5\u4f5c\u8fdb\u7a0b\u6570
  • train_transformers: \u8bad\u7ec3\u8fc7\u7a0b\u6570\u636e\u589e\u5f3a
  • test_transformers: \u9a8c\u8bc1\u8fc7\u7a0b\u6570\u636e\u589e\u5f3a
  • ...
"},{"location":"zh/tutorials/configuration/#yaml_1","title":"yaml\u6587\u4ef6\u6837\u4f8b","text":"

\u8be5\u90e8\u5206\u53c2\u6570\u5728configs/yolov3/hyp.scratch.yaml\u4e2d\u5b9a\u4e49\uff0c\u5176\u4e2dtrain_transformers\u548ctest_transformers\u5747\u4e3a\u7531\u5b57\u5178\u7ec4\u6210\u7684\u5217\u8868\uff0c\u5404\u5b57\u5178\u5305\u542b\u6570\u636e\u589e\u5f3a\u64cd\u4f5c\u540d\u79f0\u3001\u53d1\u751f\u6982\u7387\u53ca\u8be5\u589e\u5f3a\u65b9\u6cd5\u76f8\u5173\u7684\u53c2\u6570

data:\nnum_parallel_workers: 4\n\ntrain_transforms:\n- { func_name: mosaic, prob: 1.0, mosaic9_prob: 0.0, translate: 0.1, scale: 0.9 }\n- { func_name: mixup, prob: 0.1, alpha: 8.0, beta: 8.0, needed_mosaic: True }\n- { func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4 }\n- { func_name: label_norm, xyxy2xywh_: True }\n- { func_name: albumentations }\n- { func_name: fliplr, prob: 0.5 }\n- { func_name: label_pad, padding_size: 160, padding_value: -1 }\n- { func_name: image_norm, scale: 255. }\n- { func_name: image_transpose, bgr2rgb: True, hwc2chw: True }\n\ntest_transforms:\n- { func_name: letterbox, scaleup: False }\n- { func_name: label_norm, xyxy2xywh_: True }\n- { func_name: label_pad, padding_size: 160, padding_value: -1 }\n- { func_name: image_norm, scale: 255. }\n- { func_name: image_transpose, bgr2rgb: True, hwc2chw: True }\n
"},{"location":"zh/tutorials/configuration/#_9","title":"\u6a21\u578b","text":""},{"location":"zh/tutorials/configuration/#_10","title":"\u53c2\u6570\u8bf4\u660e","text":"
  • model_name: \u6a21\u578b\u540d\u79f0
  • depth_multiple: \u6a21\u578b\u6df1\u5ea6\u56e0\u5b50
  • width_multiple: \u6a21\u578b\u5bbd\u5ea6\u56e0\u5b50
  • stride: \u7279\u5f81\u56fe\u4e0b\u91c7\u6837\u500d\u6570
  • anchors: \u9884\u8bbe\u951a\u6846
  • backbone: \u6a21\u578b\u9aa8\u5e72\u7f51\u7edc
  • head: \u6a21\u578b\u68c0\u6d4b\u5934
"},{"location":"zh/tutorials/configuration/#yaml_2","title":"yaml\u6587\u4ef6\u6837\u4f8b","text":"

\u8be5\u90e8\u5206\u53c2\u6570\u5728configs/yolov3/yolov3.yaml\u4e2d\u5b9a\u4e49\uff0c\u6839\u636ebackbon\u548chead\u53c2\u6570\u8fdb\u884c\u7f51\u7edc\u6784\u5efa\uff0c\u53c2\u6570\u4ee5\u5d4c\u5957\u5217\u8868\u7684\u5f62\u5f0f\u5448\u73b0\uff0c\u6bcf\u884c\u4ee3\u8868\u4e00\u5c42\u6a21\u5757\uff0c\u5305\u542b4\u4e2a\u53c2\u6570\uff0c\u5206\u522b\u662f \u8f93\u5165\u5c42\u7f16\u53f7(-1\u4ee3\u8868\u4e0a\u4e00\u5c42)\u3001\u6a21\u5757\u91cd\u590d\u6b21\u6570\u3001\u6a21\u5757\u540d\u79f0\u548c\u6a21\u5757\u76f8\u5e94\u53c2\u6570\u3002\u7528\u6237\u4e5f\u53ef\u4ee5\u4e0d\u501f\u52a9yaml\u6587\u4ef6\u800c\u76f4\u63a5\u5728py\u6587\u4ef6\u4e2d\u5b9a\u4e49\u548c\u6ce8\u518c\u7f51\u7edc\u3002

network:\nmodel_name: yolov3\n\ndepth_multiple: 1.0  # model depth multiple\nwidth_multiple: 1.0  # layer channel multiple\nstride: [8, 16, 32]\nanchors:\n- [10,13, 16,30, 33,23]  # P3/8\n- [30,61, 62,45, 59,119]  # P4/16\n- [116,90, 156,198, 373,326]  # P5/32\n\n# darknet53 backbone\nbackbone:\n# [from, number, module, args]\n[[-1, 1, ConvNormAct, [32, 3, 1]],  # 0\n[-1, 1, ConvNormAct, [64, 3, 2]],  # 1-P1/2\n[-1, 1, Bottleneck, [64]],\n[-1, 1, ConvNormAct, [128, 3, 2]],  # 3-P2/4\n[-1, 2, Bottleneck, [128]],\n[-1, 1, ConvNormAct, [256, 3, 2]],  # 5-P3/8\n[-1, 8, Bottleneck, [256]],\n[-1, 1, ConvNormAct, [512, 3, 2]],  # 7-P4/16\n[-1, 8, Bottleneck, [512]],\n[-1, 1, ConvNormAct, [1024, 3, 2]],  # 9-P5/32\n[-1, 4, Bottleneck, [1024]],  # 10\n]\n\n# YOLOv3 head\nhead:\n[[-1, 1, Bottleneck, [1024, False]],\n[-1, 1, ConvNormAct, [512, 1, 1]],\n[-1, 1, ConvNormAct, [1024, 3, 1]],\n[-1, 1, ConvNormAct, [512, 1, 1]],\n[-1, 1, ConvNormAct, [1024, 3, 1]],  # 15 (P5/32-large)\n\n[-2, 1, ConvNormAct, [256, 1, 1]],\n[-1, 1, Upsample, [None, 2, 'nearest']],\n[[-1, 8], 1, Concat, [1]],  # cat backbone P4\n[-1, 1, Bottleneck, [512, False]],\n[-1, 1, Bottleneck, [512, False]],\n[-1, 1, ConvNormAct, [256, 1, 1]],\n[-1, 1, ConvNormAct, [512, 3, 1]],  # 22 (P4/16-medium)\n\n[-2, 1, ConvNormAct, [128, 1, 1]],\n[-1, 1, Upsample, [None, 2, 'nearest']],\n[[-1, 6], 1, Concat, [1]],  # cat backbone P3\n[-1, 1, Bottleneck, [256, False]],\n[-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)\n\n[[27, 22, 15], 1, YOLOv3Head, [nc, anchors, stride]],   # Detect(P3, P4, P5)\n]\n
"},{"location":"zh/tutorials/configuration/#_11","title":"\u635f\u5931\u51fd\u6570","text":""},{"location":"zh/tutorials/configuration/#_12","title":"\u53c2\u6570\u8bf4\u660e","text":"
  • name: \u635f\u5931\u51fd\u6570\u540d\u79f0
  • box: box\u635f\u5931\u6743\u91cd
  • cls: class\u635f\u5931\u6743\u91cd
  • cls_pw: class\u635f\u5931\u6b63\u6837\u672c\u6743\u91cd
  • obj: object\u635f\u5931\u6743\u91cd
  • obj_pw: object\u635f\u5931\u6b63\u6837\u672c\u6743\u91cd
  • fl_gamma: focal loss gamma
  • anchor_t: anchor shape\u6bd4\u4f8b\u9608\u503c
  • label_smoothing: \u6807\u7b7e\u5e73\u6ed1\u503c
"},{"location":"zh/tutorials/configuration/#yaml_3","title":"yaml\u6587\u4ef6\u6837\u4f8b","text":"

\u8be5\u90e8\u5206\u53c2\u6570\u5728configs/yolov3/hyp.scratch.yaml\u4e2d\u5b9a\u4e49

loss:\nname: YOLOv7Loss\nbox: 0.05  # box loss gain\ncls: 0.5  # cls loss gain\ncls_pw: 1.0  # cls BCELoss positive_weight\nobj: 1.0  # obj loss gain (scale with pixels)\nobj_pw: 1.0  # obj BCELoss positive_weight\nfl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)\nanchor_t: 4.0  # anchor-multiple threshold\nlabel_smoothing: 0.0 # label smoothing epsilon\n
"},{"location":"zh/tutorials/configuration/#_13","title":"\u4f18\u5316\u5668","text":""},{"location":"zh/tutorials/configuration/#_14","title":"\u53c2\u6570\u8bf4\u660e","text":"
  • optimizer: \u4f18\u5316\u5668\u540d\u79f0\u3002
  • lr_init: \u5b66\u4e60\u7387\u521d\u59cb\u503c
  • warmup_epochs: warmup epoch\u6570
  • warmup_momentum: warmup momentum\u521d\u59cb\u503c
  • warmup_bias_lr: warmup bias\u5b66\u4e60\u7387\u521d\u59cb\u503c
  • min_warmup_step: \u6700\u5c0fwarmup step\u6570
  • group_param: \u53c2\u6570\u5206\u7ec4\u7b56\u7565
  • gp_weight_decay: \u5206\u7ec4\u53c2\u6570\u6743\u91cd\u8870\u51cf\u7cfb\u6570
  • start_factor: \u521d\u59cb\u5b66\u4e60\u7387\u56e0\u6570
  • end_factor: \u7ed3\u675f\u5b66\u4e60\u7387\u56e0\u6570
  • momentum\uff1a\u79fb\u52a8\u5e73\u5747\u7684\u52a8\u91cf
  • loss_scale\uff1aloss\u7f29\u653e\u7cfb\u6570
  • nesterov\uff1a\u662f\u5426\u4f7f\u7528Nesterov Accelerated Gradient (NAG)\u7b97\u6cd5\u66f4\u65b0\u68af\u5ea6\u3002
"},{"location":"zh/tutorials/configuration/#yaml_4","title":"yaml\u6587\u4ef6\u6837\u4f8b","text":"

\u8be5\u90e8\u5206\u53c2\u6570\u5728configs/yolov3/hyp.scratch.yaml\u4e2d\u5b9a\u4e49\uff0c\u5982\u4e0b\u793a\u4f8b\u4e2d\u7ecf\u8fc7warmup\u9636\u6bb5\u540e\u7684\u521d\u59cb\u5b66\u4e60\u7387\u4e3alr_init * start_factor = 0.01 * 1.0 = 0.01, \u6700\u7ec8\u5b66\u4e60\u7387\u4e3alr_init * end_factor = 0.01 * 0.01 = 0.0001

optimizer:\noptimizer: momentum\nlr_init: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)\nmomentum: 0.937  # SGD momentum/Adam beta1\nnesterov: True # update gradients with NAG(Nesterov Accelerated Gradient) algorithm\nloss_scale: 1.0 # loss scale for optimizer\nwarmup_epochs: 3  # warmup epochs (fractions ok)\nwarmup_momentum: 0.8  # warmup initial momentum\nwarmup_bias_lr: 0.1  # warmup initial bias lr\nmin_warmup_step: 1000 # minimum warmup step\ngroup_param: yolov7 # group param strategy\ngp_weight_decay: 0.0005  # group param weight decay 5e-4\nstart_factor: 1.0\nend_factor: 0.01\n
"},{"location":"zh/tutorials/data_augmentation/","title":"\u6570\u636e\u589e\u5f3a","text":""},{"location":"zh/tutorials/data_augmentation/#_2","title":"\u5957\u4ef6\u81ea\u5e26\u7684\u6570\u636e\u589e\u5f3a\u65b9\u6cd5\u6e05\u5355","text":"\u6570\u636e\u589e\u5f3a\u65b9\u6cd5\u540d \u6982\u8981\u89e3\u91ca mosaic \u968f\u673a\u9009\u62e9mosaic4\u548cmosaic9 mosaic4 4\u5206\u683c\u62fc\u63a5 mosaic9 9\u5206\u683c\u62fc\u63a5 mixup \u5bf9\u4e24\u4e2a\u56fe\u50cf\u8fdb\u884c\u7ebf\u6027\u6df7\u5408 pastein \u526a\u8d34\u589e\u5f3a random_perspective \u968f\u673a\u900f\u89c6\u53d8\u6362 hsv_augment \u968f\u673a\u989c\u8272\u53d8\u6362 fliplr \u6c34\u5e73\u7ffb\u8f6c flipud \u5782\u76f4\u7ffb\u8f6c letterbox \u7f29\u653e\u548c\u586b\u5145 label_norm \u6807\u7b7e\u5f52\u4e00\u5316 \u5750\u6807\u5f52\u4e00\u5316\u52300-1\u5230\u8303\u56f4 label_pad \u5c06\u6807\u7b7e\u4fe1\u606f\u586b\u5145\u4e3a\u56fa\u5b9a\u5927\u5c0f\u7684\u6570\u7ec4 image_norm \u56fe\u50cf\u6570\u636e\u6807\u51c6\u5316 image_transpose \u901a\u9053\u8f6c\u7f6e\u548c\u7ef4\u5ea6\u8f6c\u7f6e albumentations albumentations\u6570\u636e\u589e\u5f3a

\u8fd9\u4e9b\u6570\u636e\u589e\u5f3a\u51fd\u6570\u5b9a\u4e49\u5728 mindyolo/data/dataset.py \u4e2d\u3002

"},{"location":"zh/tutorials/data_augmentation/#_3","title":"\u4f7f\u7528\u65b9\u6cd5","text":"

MindYOLO\u6570\u636e\u589e\u5f3a\u65b9\u6cd5\u901a\u8fc7\u5728yaml\u6587\u4ef6\u91cc\u914d\u7f6e\u3002\u4f8b\u5982\uff0c\u8bad\u7ec3\u8fc7\u7a0b\u6dfb\u52a0\u4e00\u4e2a\u6570\u636e\u589e\u5f3a\uff0c\u9700\u8981\u5728yaml\u6587\u4ef6data.train_transforms\u5b57\u6bb5\u4e0b\u6dfb\u52a0\u4e00\u4e2a\u5b57\u5178\u5217\u8868\uff0c\u6570\u636e\u589e\u5f3a\u65b9\u6cd5\u81ea\u4e0a\u800c\u4e0b\u4f9d\u6b21\u7f57\u5217\u3002

\u4e00\u4e2a\u5178\u578b\u7684\u6570\u636e\u589e\u5f3a\u65b9\u6cd5\u914d\u7f6e\u5b57\u5178\u91cc\u5fc5\u987b\u6709func_name\uff0c\u8868\u793a\u5e94\u7528\u7684\u6570\u636e\u589e\u5f3a\u65b9\u6cd5\u540d\uff0c\u800c\u540e\u7f57\u5217\u8be5\u65b9\u6cd5\u9700\u8981\u8bbe\u7f6e\u7684\u53c2\u6570\uff0c\u82e5\u6ca1\u6709\u5728\u6570\u636e\u589e\u5f3a\u914d\u7f6e\u5b57\u5178\u4e2d\u914d\u7f6e\u53c2\u6570\u9879\uff0c\u5219\u4f1a\u9009\u62e9\u8be5\u6570\u636e\u589e\u5f3a\u65b9\u6cd5\u9ed8\u8ba4\u7684\u6570\u503c\u3002

\u6570\u636e\u589e\u5f3a\u901a\u7528\u914d\u7f6e\u5b57\u5178\uff1a

- {func_name: \u6570\u636e\u589e\u5f3a\u65b9\u6cd5\u540d1, args11=x11, args12=x12, ..., args1n=x1n}\n- {func_name: \u6570\u636e\u589e\u5f3a\u65b9\u6cd5\u540d2, args21=x21, args22=x22, ..., args2n=x2n}\n...\n- {func_name: \u6570\u636e\u589e\u5f3a\u65b9\u6cd5\u540dn, argsn1=xn1, argsn2=xn2, ..., argsnn=xnn}\n

\u4ee5YOLOv7\u8bad\u7ec3\u6570\u636e\u589e\u5f3a\u793a\u4f8b\uff1a

# \u6587\u4ef6\u76ee\u5f55\uff1aconfigs/yolov7/hyp.scratch.tiny.yaml (https://github.com/mindspore-lab/mindyolo/blob/master/configs/yolov7/hyp.scratch.tiny.yaml)\ntrain_transforms:\n- {func_name: mosaic, prob: 1.0, mosaic9_prob: 0.2, translate: 0.1, scale: 0.5}\n- {func_name: mixup, prob: 0.05, alpha: 8.0, beta: 8.0, needed_mosaic: True}\n- {func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4}\n- {func_name: pastein, prob: 0.05, num_sample: 30}\n- {func_name: label_norm, xyxy2xywh_: True}\n- {func_name: fliplr, prob: 0.5}\n- {func_name: label_pad, padding_size: 160, padding_value: -1}\n- {func_name: image_norm, scale: 255.}\n- {func_name: image_transpose, bgr2rgb: True, hwc2chw: True}\n
\u6ce8\u610f\uff1afunc_name\u8868\u793a\u6570\u636e\u589e\u5f3a\u65b9\u6cd5\u540d\uff0cprob\uff0cmosaic9_prob\uff0ctranslate\uff0cscale\u4e3a\u8be5\u65b9\u6cd5\u53c2\u6570\u3002 \u5176\u4e2dprob\u4e3a\u6240\u6709\u65b9\u6cd5\u5747\u6709\u7684\u53c2\u6570\uff0c\u8868\u793a\u8be5\u6570\u636e\u589e\u5f3a\u65b9\u6cd5\u7684\u6267\u884c\u6982\u7387\uff0c\u9ed8\u8ba4\u503c\u4e3a1

\u4e0a\u8ff0yaml\u6587\u4ef6\u6267\u884c\u7684\u5177\u4f53\u64cd\u4f5c\u5982\u4e0b\uff1a

  • mosaic\uff1a\u4ee51.0\u7684\u6982\u7387\u5bf9\u8f93\u5165\u7684\u56fe\u7247\u8fdb\u884cmosaic\u64cd\u4f5c\uff0c\u5373\u5c064\u5f20\u4e0d\u540c\u7684\u56fe\u7247\u62fc\u63a5\u6210\u4e00\u5f20\u56fe\u7247\u3002mosaic9_prob\u8868\u793a\u4f7f\u75289\u5bab\u683c\u65b9\u5f0f\u8fdb\u884c\u62fc\u63a5\u7684\u6982\u7387\uff0ctranslate\u548cscale\u5206\u522b\u8868\u793a\u968f\u673a\u5e73\u79fb\u548c\u7f29\u653e\u7684\u7a0b\u5ea6\u3002 \u5982\u56fe\u6240\u793a\uff1a

  • mixup\uff1a\u4ee50.05\u7684\u6982\u7387\u5bf9\u8f93\u5165\u7684\u56fe\u7247\u8fdb\u884cmixup\u64cd\u4f5c\uff0c\u5373\u5c06\u4e24\u5f20\u4e0d\u540c\u7684\u56fe\u7247\u8fdb\u884c\u6df7\u5408\u3002\u5176\u4e2dalpha\u548cbeta\u8868\u793a\u6df7\u5408\u7cfb\u6570\uff0cneeded_mosaic\u8868\u793a\u662f\u5426\u9700\u8981\u4f7f\u7528mosaic\u8fdb\u884c\u6df7\u5408\u3002

  • hsv_augment: HSV\u589e\u5f3a, \u4ee51.0\u7684\u6982\u7387\u5bf9\u8f93\u5165\u7684\u56fe\u7247\u8fdb\u884cHSV\u989c\u8272\u7a7a\u95f4\u7684\u8c03\u6574\uff0c\u589e\u52a0\u6570\u636e\u591a\u6837\u6027\u3002\u5176\u4e2dhgain\u3001sgain\u548cvgain\u5206\u522b\u8868\u793a\u5bf9H\u3001S\u3001V\u901a\u9053\u7684\u8c03\u6574\u7a0b\u5ea6\u3002

  • pastein\uff1a\u4ee50.05\u7684\u6982\u7387\u5728\u8f93\u5165\u7684\u56fe\u7247\u4e2d\u968f\u673a\u8d34\u5165\u4e00\u4e9b\u6837\u672c\u3002\u5176\u4e2dnum_sample\u8868\u793a\u968f\u673a\u8d34\u5165\u7684\u6837\u672c\u6570\u91cf\u3002

  • label_norm\uff1a\u5c06\u8f93\u5165\u7684\u6807\u7b7e\u4ece(x1, y1, x2, y2)\u7684\u683c\u5f0f\u8f6c\u6362\u4e3a(x, y, w, h)\u7684\u683c\u5f0f\u3002

  • fliplr\uff1a\u4ee50.5\u7684\u6982\u7387\u5bf9\u8f93\u5165\u7684\u56fe\u7247\u8fdb\u884c\u6c34\u5e73\u7ffb\u8f6c\uff0c\u589e\u52a0\u6570\u636e\u591a\u6837\u6027\u3002

  • label_pad\uff1a\u5bf9\u8f93\u5165\u7684\u6807\u7b7e\u8fdb\u884c\u586b\u5145\uff0c\u4f7f\u5f97\u6bcf\u4e2a\u56fe\u7247\u90fd\u6709\u76f8\u540c\u6570\u91cf\u7684\u6807\u7b7e\u3002padding_size\u8868\u793a\u586b\u5145\u540e\u6807\u7b7e\u7684\u6570\u91cf\uff0cpadding_value\u8868\u793a\u586b\u5145\u7684\u503c\u3002

  • image_norm\uff1a\u5c06\u8f93\u5165\u7684\u56fe\u7247\u50cf\u7d20\u503c\u4ece[0, 255]\u8303\u56f4\u5185\u7f29\u653e\u5230[0, 1]\u8303\u56f4\u5185\u3002

  • image_transpose\uff1a\u5c06\u8f93\u5165\u7684\u56fe\u7247\u4eceBGR\u683c\u5f0f\u8f6c\u6362\u4e3aRGB\u683c\u5f0f\uff0c\u5e76\u5c06\u56fe\u7247\u7684\u901a\u9053\u6570\u4eceHWC\u683c\u5f0f\u8f6c\u6362\u4e3aCHW\u683c\u5f0f\u3002

\u6d4b\u8bd5\u6570\u636e\u589e\u5f3a\u9700\u8981\u7528test_transforms\u5b57\u6bb5\u6807\u6ce8\uff0c\u914d\u7f6e\u65b9\u6cd5\u540c\u8bad\u7ec3\u3002

"},{"location":"zh/tutorials/data_augmentation/#_4","title":"\u81ea\u5b9a\u4e49\u6570\u636e\u589e\u5f3a","text":"

\u7f16\u5199\u6307\u5357\uff1a

  • \u5728mindyolo/data/dataset.py\u6587\u4ef6COCODataset\u7c7b\u4e2d\u6dfb\u52a0\u81ea\u5b9a\u4e49\u6570\u636e\u589e\u5f3a\u65b9\u6cd5
  • \u6570\u636e\u589e\u5f3a\u65b9\u6cd5\u7684\u8f93\u5165\u901a\u5e38\u5305\u542b\u56fe\u7247\u3001\u6807\u7b7e\u548c\u81ea\u5b9a\u4e49\u53c2\u6570\u3002
  • \u7f16\u5199\u51fd\u6570\u4f53\u5185\u5bb9\uff0c\u81ea\u5b9a\u4e49\u8f93\u51fa

\u4e00\u4e2a\u5178\u578b\u7684\u6570\u636e\u589e\u5f3a\u65b9\u6cd5\uff1a

#\u5728mindyolo/data/dataset.py COCODataset \u6dfb\u52a0\u5b50\u65b9\u6cd5\n    def data_trans_func(self, image, labels, args1=x1, args2=x2, ..., argsn=xn):\n        # \u6570\u636e\u589e\u5f3a\u903b\u8f91\n        ......\n        return image, labels\n
\u81ea\u5b9a\u4e49\u4e00\u4e2a\u529f\u80fd\u4e3a\u65cb\u8f6c\u7684\u6570\u636e\u589e\u5f3a\u51fd\u6570
#mindyolo/data/dataset.py\n    def rotate(self, image, labels, angle):\n        # rotate image\n        image = np.rot90(image, angle // 90)\n        if len(labels):\n            if angle == 90:\n                labels[:, 0], labels[:, 1] = 1 - labels[:, 1], labels[:, 0]\n            elif angle == 180:\n                labels[:, 0], labels[:, 1] = 1 - labels[:, 0], 1 - labels[:, 1]\n            elif angle == 270:\n                labels[:, 0], labels[:, 1] = labels[:, 1], 1 - labels[:, 0]\n        return image, labels\n

\u4f7f\u7528\u6307\u5357\uff1a - \u5728\u6a21\u578b\u7684yaml\u6587\u4ef6\u4e2d\uff0c\u4ee5\u5b57\u5178\u7684\u5f62\u5f0f\u5b9a\u4e49\u6b64\u6570\u636e\u589e\u5f3a\u65b9\u6cd5\u3002\u4e0e\u4e0a\u6587\u6240\u8ff0\u7528\u6cd5\u4e00\u81f4

    - {func_name: rotate, angle: 90}\n

\u6548\u679c\u5c55\u793a\uff1a

"},{"location":"zh/tutorials/deployment/","title":"\u90e8\u7f72","text":""},{"location":"zh/tutorials/deployment/#_2","title":"\u4f9d\u8d56","text":"
pip install -r requirement.txt\n
"},{"location":"zh/tutorials/deployment/#mindspore-lite","title":"MindSpore Lite\u73af\u5883\u51c6\u5907","text":"

\u53c2\u8003\uff1aLite\u73af\u5883\u914d\u7f6e \u6ce8\u610f\uff1aMindSpore Lite\u9002\u914d\u7684python\u73af\u5883\u4e3a3.7\uff0c\u8bf7\u5728\u5b89\u88c5Lite\u524d\u51c6\u5907\u597dpython3.7\u7684\u73af\u5883

  1. \u6839\u636e\u73af\u5883\uff0c\u4e0b\u8f7d\u914d\u5957\u7684tar.gz\u5305\u548cwhl\u5305

  2. \u89e3\u538btar.gz\u5305\u5e76\u5b89\u88c5\u5bf9\u5e94\u7248\u672c\u7684whl\u5305

    tar -zxvf mindspore_lite-2.0.0a0-cp37-cp37m-{os}_{platform}_64.tar.gz\npip install mindspore_lite-2.0.0a0-cp37-cp37m-{os}_{platform}_64.whl\n

  3. \u914d\u7f6eLite\u7684\u73af\u5883\u53d8\u91cf LITE_HOME\u4e3atar.gz\u89e3\u538b\u51fa\u7684\u6587\u4ef6\u5939\u8def\u5f84\uff0c\u63a8\u8350\u4f7f\u7528\u7edd\u5bf9\u8def\u5f84
    export LITE_HOME=/path/to/mindspore-lite-{version}-{os}-{platform}\nexport LD_LIBRARY_PATH=$LITE_HOME/runtime/lib:$LITE_HOME/tools/converter/lib:$LD_LIBRARY_PATH\nexport PATH=$LITE_HOME/tools/converter/converter:$LITE_HOME/tools/benchmark:$PATH\n
"},{"location":"zh/tutorials/deployment/#_3","title":"\u5feb\u901f\u5f00\u59cb","text":""},{"location":"zh/tutorials/deployment/#_4","title":"\u6a21\u578b\u8f6c\u6362","text":"

ckpt\u6a21\u578b\u8f6c\u4e3amindir\u6a21\u578b\uff0c\u6b64\u6b65\u9aa4\u53ef\u5728CPU/Ascend910\u4e0a\u8fd0\u884c

python ./deploy/export.py --config ./path_to_config/model.yaml --weight ./path_to_ckpt/weight.ckpt --per_batch_size 1 --file_format MINDIR --device_target [CPU/Ascend]\ne.g.\n# \u5728CPU\u4e0a\u8fd0\u884c\npython ./deploy/export.py --config ./configs/yolov5/yolov5n.yaml --weight yolov5n_300e_mAP273-9b16bd7b.ckpt --per_batch_size 1 --file_format MINDIR --device_target CPU\n# \u5728Ascend\u4e0a\u8fd0\u884c\npython ./deploy/export.py --config ./configs/yolov5/yolov5n.yaml --weight yolov5n_300e_mAP273-9b16bd7b.ckpt --per_batch_size 1 --file_format MINDIR --device_target Ascend\n

"},{"location":"zh/tutorials/deployment/#_5","title":"\u811a\u672c\u8bf4\u660e","text":"
  • predict.py \u652f\u6301\u5355\u5f20\u56fe\u7247\u63a8\u7406
  • test.py \u652f\u6301COCO\u6570\u636e\u96c6\u63a8\u7406
  • \u6ce8\u610f\uff1a\u5f53\u524d\u53ea\u652f\u6301\u5728Ascend 310\u4e0a\u63a8\u7406
"},{"location":"zh/tutorials/deployment/#mindx","title":"MindX\u90e8\u7f72","text":""},{"location":"zh/tutorials/deployment/#_6","title":"\u73af\u5883\u914d\u7f6e","text":"

\u53c2\u8003\uff1aMindX\u73af\u5883\u51c6\u5907 \u6ce8\u610f\uff1aMindX\u76ee\u524d\u652f\u6301\u7684python\u7248\u672c\u4e3a3.9\uff0c\u8bf7\u5728\u5b89\u88c5MindX\u524d\uff0c\u51c6\u5907\u597dpython3.9\u7684\u73af\u5883

  1. \u5728MindX\u5b98\u7f51\u83b7\u53d6\u73af\u5883\u5b89\u88c5\u5305\uff0c\u76ee\u524d\u652f\u63013.0.0\u7248\u672cMindX\u63a8\u7406

  2. \u8df3\u8f6c\u81f3\u4e0b\u8f7d\u9875\u9762\u4e0b\u8f7dAscend-mindxsdk-mxmanufacture_{version}_linux-{arch}.run

  3. \u5c06\u5b89\u88c5\u5305\u653e\u7f6e\u4e8eAscend310\u673a\u5668\u76ee\u5f55\u4e2d\u5e76\u89e3\u538b

  4. \u5982\u4e0d\u662froot\u7528\u6237\uff0c\u9700\u589e\u52a0\u5bf9\u5957\u4ef6\u5305\u7684\u53ef\u6267\u884c\u6743\u9650\uff1a

    chmod +x Ascend-mindxsdk-mxmanufacture_{version}_linux-{arch}.run\n

  5. \u8fdb\u5165\u5f00\u53d1\u5957\u4ef6\u5305\u7684\u4e0a\u4f20\u8def\u5f84\uff0c\u5b89\u88c5mxManufacture\u5f00\u53d1\u5957\u4ef6\u5305\u3002
    ./Ascend-mindxsdk-mxmanufacture_{version}_linux-{arch}.run --install\n
    \u5b89\u88c5\u5b8c\u6210\u540e\uff0c\u82e5\u51fa\u73b0\u5982\u4e0b\u56de\u663e\uff0c\u8868\u793a\u8f6f\u4ef6\u6210\u529f\u5b89\u88c5\u3002
    The installation is successfully\n
    \u5b89\u88c5\u5b8c\u6210\u540e\uff0cmxManufacture\u8f6f\u4ef6\u76ee\u5f55\u7ed3\u6784\u5982\u4e0b\u6240\u793a\uff1a
    .\n\u251c\u2500\u2500 bin\n\u251c\u2500\u2500 config\n\u251c\u2500\u2500 filelist.txt\n\u251c\u2500\u2500 include\n\u251c\u2500\u2500 lib\n\u251c\u2500\u2500 opensource\n\u251c\u2500\u2500 operators\n\u251c\u2500\u2500 python\n\u251c\u2500\u2500 samples\n\u251c\u2500\u2500 set_env.sh\n\u251c\u2500\u2500 toolkit\n\u2514\u2500\u2500 version.info\n
  6. \u8fdb\u5165mxmanufacture\u7684\u5b89\u88c5\u76ee\u5f55\uff0c\u8fd0\u884c\u4ee5\u4e0b\u547d\u4ee4\uff0c\u4f7fMindX SDK\u73af\u5883\u53d8\u91cf\u751f\u6548\u3002
    source set_env.sh\n
  7. \u8fdb\u5165./mxVision-3.0.0/python/\uff0c\u5b89\u88c5mindx-3.0.0-py3-none-any.whl
    pip install mindx-3.0.0-py3-none-any.whl\n
"},{"location":"zh/tutorials/deployment/#_7","title":"\u6a21\u578b\u8f6c\u6362","text":"
  1. ckpt\u6a21\u578b\u8f6c\u4e3aair\u6a21\u578b\uff0c\u6b64\u6b65\u9aa4\u9700\u8981\u5728Ascend910\u4e0a\u64cd\u4f5c

    python ./deploy/export.py --config ./path_to_config/model.yaml --weight ./path_to_ckpt/weight.ckpt --per_batch_size 1 --file_format AIR\ne.g.\npython ./deploy/export.py --config ./configs/yolov5/yolov5n.yaml --weight yolov5n_300e_mAP273-9b16bd7b.ckpt --per_batch_size 1 --file_format AIR\n
    yolov7\u9700\u8981\u57282.0\u7248\u672c\u4ee5\u4e0a\u7684Ascend910\u673a\u5668\u8fd0\u884cexport

  2. air\u6a21\u578b\u8f6c\u4e3aom\u6a21\u578b\uff0c\u4f7f\u7528atc\u8f6c\u6362\u5de5\u5177\uff0c\u6b64\u6b65\u9aa4\u9700\u5b89\u88c5MindX\u73af\u5883\uff0c\u5728Ascend310\u4e0a\u8fd0\u884c

    atc --model=./path_to_air/weight.air --framework=1 --output=yolo  --soc_version=Ascend310\n

"},{"location":"zh/tutorials/deployment/#mindx-test","title":"MindX Test","text":"

\u5bf9COCO\u6570\u636e\u63a8\u7406\uff1a

python ./deploy/test.py --model_type MindX --model_path ./path_to_om/weight.om --config ./path_to_config/yolo.yaml\ne.g.\npython ./deploy/test.py --model_type MindX --model_path ./yolov5n.om --config ./configs/yolov5/yolov5n.yaml\n

"},{"location":"zh/tutorials/deployment/#mindx-predict","title":"MindX Predict","text":"

\u5bf9\u5355\u5f20\u56fe\u7247\u63a8\u7406\uff1a

python ./deploy/predict.py --model_type MindX --model_path ./path_to_om/weight.om --config ./path_to_config/yolo.yaml --image_path ./path_to_image/image.jpg\ne.g.\npython ./deploy/predict.py --model_type MindX --model_path ./yolov5n.om --config ./configs/yolov5/yolov5n.yaml --image_path ./coco/image/val2017/image.jpg\n

"},{"location":"zh/tutorials/deployment/#mindir","title":"MindIR\u90e8\u7f72","text":""},{"location":"zh/tutorials/deployment/#_8","title":"\u73af\u5883\u8981\u6c42","text":"

mindspore>=2.1

"},{"location":"zh/tutorials/deployment/#_9","title":"\u6ce8\u610f\u4e8b\u9879","text":"
  1. \u5f53\u524d\u4ec5\u652f\u6301Predict

  2. \u7406\u8bba\u4e0a\u4e5f\u53ef\u5728Ascend910\u4e0a\u8fd0\u884c\uff0c\u672a\u6d4b\u8bd5

"},{"location":"zh/tutorials/deployment/#_10","title":"\u6a21\u578b\u8f6c\u6362","text":"

ckpt\u6a21\u578b\u8f6c\u4e3amindir\u6a21\u578b\uff0c\u6b64\u6b65\u9aa4\u53ef\u5728CPU\u4e0a\u8fd0\u884c

python ./deploy/export.py --config ./path_to_config/model.yaml --weight ./path_to_ckpt/weight.ckpt --per_batch_size 1 --file_format MINDIR --device_target CPU\ne.g.\n# \u5728CPU\u4e0a\u8fd0\u884c\npython ./deploy/export.py --config ./configs/yolov5/yolov5n.yaml --weight yolov5n_300e_mAP273-9b16bd7b.ckpt --per_batch_size 1 --file_format MINDIR --device_target CPU\n

"},{"location":"zh/tutorials/deployment/#mindir-test","title":"MindIR Test","text":"

\u656c\u8bf7\u671f\u5f85

"},{"location":"zh/tutorials/deployment/#mindir-predict","title":"MindIR Predict","text":"

\u5bf9\u5355\u5f20\u56fe\u7247\u63a8\u7406\uff1a

python ./deploy/predict.py --model_type MindIR --model_path ./path_to_mindir/weight.mindir --config ./path_to_conifg/yolo.yaml --image_path ./path_to_image/image.jpg\ne.g.\npython deploy/predict.py --model_type MindIR --model_path ./yolov5n.mindir --config ./configs/yolov5/yolov5n.yaml --image_path ./coco/image/val2017/image.jpg\n

"},{"location":"zh/tutorials/deployment/#onnx","title":"ONNX\u90e8\u7f72","text":"

\u6ce8\u610f: \u4ec5\u90e8\u5206\u6a21\u578b\u652f\u6301\u5bfc\u51faONNX\u5e76\u4f7f\u7528ONNXRuntime\u8fdb\u884c\u90e8\u7f72

"},{"location":"zh/tutorials/deployment/#_11","title":"\u73af\u5883\u914d\u7f6e","text":"
pip install onnx>=1.9.0\npip install onnxruntime>=1.8.0\n
"},{"location":"zh/tutorials/deployment/#_12","title":"\u6ce8\u610f\u4e8b\u9879","text":"
  1. \u5f53\u524d\u5e76\u975e\u6240\u6709mindyolo\u5747\u652f\u6301ONNX\u5bfc\u51fa\u548c\u63a8\u7406\uff08\u4ec5\u4ee5YoloV3\u4e3a\u4f8b\uff09

  2. \u5f53\u524d\u4ec5\u652f\u6301Predict\u529f\u80fd

  3. \u5bfc\u51faONNX\u9700\u8981\u8c03\u6574nn.SiLU\u7b97\u5b50\uff0c\u91c7\u7528sigmoid\u7b97\u5b50\u5e95\u5c42\u5b9e\u73b0

\u4f8b\u5982\uff1a\u6dfb\u52a0\u5982\u4e0b\u81ea\u5b9a\u4e49\u5c42\u5e76\u66ff\u6362mindyolo\u4e2d\u6240\u6709\u7684nn.SiLU

class EdgeSiLU(nn.Cell):\n\"\"\"\n    SiLU activation function: x * sigmoid(x). To support for onnx export with nn.SiLU.\n    \"\"\"\n\n    def __init__(self):\n        super().__init__()\n\n    def construct(self, x):\n        return x * ops.sigmoid(x)\n

"},{"location":"zh/tutorials/deployment/#_13","title":"\u6a21\u578b\u8f6c\u6362","text":"

ckpt\u6a21\u578b\u8f6c\u4e3aONNX\u6a21\u578b\uff0c\u6b64\u6b65\u9aa4\u4ee5\u53caTest\u6b65\u9aa4\u5747\u4ec5\u652f\u6301CPU\u4e0a\u8fd0\u884c

python ./deploy/export.py --config ./path_to_config/model.yaml --weight ./path_to_ckpt/weight.ckpt --per_batch_size 1 --file_format ONNX --device_target [CPU]\ne.g.\n# \u5728CPU\u4e0a\u8fd0\u884c\npython ./deploy/export.py --config ./configs/yolov3/yolov3.yaml --weight yolov3-darknet53_300e_mAP455-adfb27af.ckpt --per_batch_size 1 --file_format ONNX --device_target CPU\n

"},{"location":"zh/tutorials/deployment/#onnx-test","title":"ONNX Test","text":"

\u656c\u8bf7\u671f\u5f85

"},{"location":"zh/tutorials/deployment/#onnxruntime-predict","title":"ONNXRuntime Predict","text":"

\u5bf9\u5355\u5f20\u56fe\u7247\u63a8\u7406\uff1a

python ./deploy/predict.py --model_type ONNX --model_path ./path_to_onnx_model/model.onnx --config ./path_to_config/yolo.yaml --image_path ./path_to_image/image.jpg\ne.g.\npython ./deploy/predict.py --model_type ONNX --model_path ./yolov3.onnx --config ./configs/yolov3/yolov3.yaml --image_path ./coco/image/val2017/image.jpg\n

"},{"location":"zh/tutorials/deployment/#_14","title":"\u6807\u51c6\u548c\u652f\u6301\u7684\u6a21\u578b\u5e93","text":"
  • YOLOv8
  • YOLOv7
  • YOLOX
  • YOLOv5
  • YOLOv4
  • YOLOv3
Name Scale Context ImageSize Dataset Box mAP (%) Params FLOPs Recipe Download YOLOv8 N D310x1-G 640 MS COCO 2017 37.2 3.2M 8.7G yaml ckpt mindir YOLOv8 S D310x1-G 640 MS COCO 2017 44.6 11.2M 28.6G yaml ckpt mindir YOLOv8 M D310x1-G 640 MS COCO 2017 50.5 25.9M 78.9G yaml ckpt mindir YOLOv8 L D310x1-G 640 MS COCO 2017 52.8 43.7M 165.2G yaml ckpt mindir YOLOv8 X D310x1-G 640 MS COCO 2017 53.7 68.2M 257.8G yaml ckpt mindir YOLOv7 Tiny D310x1-G 640 MS COCO 2017 37.5 6.2M 13.8G yaml ckpt mindir YOLOv7 L D310x1-G 640 MS COCO 2017 50.8 36.9M 104.7G yaml ckpt mindir YOLOv7 X D310x1-G 640 MS COCO 2017 52.4 71.3M 189.9G yaml ckpt mindir YOLOv5 N D310x1-G 640 MS COCO 2017 27.3 1.9M 4.5G yaml ckpt mindir YOLOv5 S D310x1-G 640 MS COCO 2017 37.6 7.2M 16.5G yaml ckpt mindir YOLOv5 M D310x1-G 640 MS COCO 2017 44.9 21.2M 49.0G yaml ckpt mindir YOLOv5 L D310x1-G 640 MS COCO 2017 48.5 46.5M 109.1G yaml ckpt mindir YOLOv5 X D310x1-G 640 MS COCO 2017 50.5 86.7M 205.7G yaml ckpt mindir YOLOv4 CSPDarknet53 D310x1-G 608 MS COCO 2017 45.4 27.6M 52G yaml ckpt mindir YOLOv4 CSPDarknet53(silu) D310x1-G 640 MS COCO 2017 45.8 27.6M 52G yaml ckpt mindir YOLOv3 Darknet53 D310x1-G 640 MS COCO 2017 45.5 61.9M 156.4G yaml ckpt mindir YOLOX N D310x1-G 416 MS COCO 2017 24.1 0.9M 1.1G yaml ckpt mindir YOLOX Tiny D310x1-G 416 MS COCO 2017 33.3 5.1M 6.5G yaml ckpt mindir YOLOX S D310x1-G 640 MS COCO 2017 40.7 9.0M 26.8G yaml ckpt mindir YOLOX M D310x1-G 640 MS COCO 2017 46.7 25.3M 73.8G yaml ckpt mindir YOLOX L D310x1-G 640 MS COCO 2017 49.2 54.2M 155.6G yaml ckpt mindir YOLOX X D310x1-G 640 MS COCO 2017 51.6 99.1M 281.9G yaml ckpt mindir YOLOX Darknet53 D310x1-G 640 MS COCO 2017 47.7 63.7M 185.3G yaml ckpt mindir"},{"location":"zh/tutorials/finetune/","title":"\u5fae\u8c03","text":""},{"location":"zh/tutorials/finetune/#finetune","title":"\u81ea\u5b9a\u4e49\u6570\u636e\u96c6finetune\u6d41\u7a0b","text":"

\u672c\u6587\u4ee5\u5b89\u5168\u5e3d\u4f69\u6234\u68c0\u6d4b\u6570\u636e\u96c6(SHWD)\u4e3a\u4f8b\uff0c\u4ecb\u7ecd\u81ea\u5b9a\u4e49\u6570\u636e\u96c6\u5728MindYOLO\u4e0a\u8fdb\u884cfinetune\u7684\u4e3b\u8981\u6d41\u7a0b\u3002

"},{"location":"zh/tutorials/finetune/#_2","title":"\u6570\u636e\u96c6\u683c\u5f0f\u8f6c\u6362","text":"

SHWD\u6570\u636e\u96c6\u91c7\u7528voc\u683c\u5f0f\u7684\u6570\u636e\u6807\u6ce8\uff0c\u5176\u6587\u4ef6\u76ee\u5f55\u5982\u4e0b\u6240\u793a\uff1a

             ROOT_DIR\n                \u251c\u2500\u2500 Annotations\n                \u2502        \u251c\u2500\u2500 000000.xml\n                \u2502        \u2514\u2500\u2500 000002.xml\n                \u251c\u2500\u2500 ImageSets\n                \u2502       \u2514\u2500\u2500 Main\n                \u2502             \u251c\u2500\u2500 test.txt\n                \u2502             \u251c\u2500\u2500 train.txt\n                \u2502             \u251c\u2500\u2500 trainval.txt\n                \u2502             \u2514\u2500\u2500 val.txt\n                \u2514\u2500\u2500 JPEGImages\n                        \u251c\u2500\u2500 000000.jpg\n                        \u2514\u2500\u2500 000002.jpg\n
Annotations\u6587\u4ef6\u5939\u4e0b\u7684xml\u6587\u4ef6\u4e3a\u6bcf\u5f20\u56fe\u7247\u7684\u6807\u6ce8\u4fe1\u606f\uff0c\u4e3b\u8981\u5185\u5bb9\u5982\u4e0b\uff1a
<annotation>\n  <folder>JPEGImages</folder>\n  <filename>000377.jpg</filename>\n  <path>F:\\baidu\\VOC2028\\JPEGImages\\000377.jpg</path>\n  <source>\n    <database>Unknown</database>\n  </source>\n  <size>\n    <width>750</width>\n    <height>558</height>\n    <depth>3</depth>\n  </size>\n  <segmented>0</segmented>\n  <object>\n    <name>hat</name>\n    <pose>Unspecified</pose>\n    <truncated>0</truncated>\n    <difficult>0</difficult>\n    <bndbox>\n      <xmin>142</xmin>\n      <ymin>388</ymin>\n      <xmax>177</xmax>\n      <ymax>426</ymax>\n    </bndbox>\n  </object>\n
\u5176\u4e2d\u5305\u542b\u591a\u4e2aobject, object\u4e2d\u7684name\u4e3a\u7c7b\u522b\u540d\u79f0\uff0cxmin, ymin, xmax, ymax\u5219\u4e3a\u68c0\u6d4b\u6846\u5de6\u4e0a\u89d2\u548c\u53f3\u4e0b\u89d2\u7684\u5750\u6807\u3002

MindYOLO\u652f\u6301\u7684\u6570\u636e\u96c6\u683c\u5f0f\u4e3aYOLO\u683c\u5f0f\uff0c\u8be6\u60c5\u53ef\u53c2\u8003\u6570\u636e\u51c6\u5907

\u7531\u4e8eMindYOLO\u5728\u9a8c\u8bc1\u9636\u6bb5\u9009\u7528\u56fe\u7247\u540d\u79f0\u4f5c\u4e3aimage_id\uff0c\u56e0\u6b64\u56fe\u7247\u540d\u79f0\u53ea\u80fd\u4e3a\u6570\u503c\u7c7b\u578b\uff0c\u800c\u4e0d\u80fd\u4e3a\u5b57\u7b26\u4e32\u7c7b\u578b\uff0c\u8fd8\u9700\u8981\u5bf9\u56fe\u7247\u8fdb\u884c\u6539\u540d\u3002\u5bf9SHWD\u6570\u636e\u96c6\u683c\u5f0f\u7684\u8f6c\u6362\u5305\u542b\u5982\u4e0b\u6b65\u9aa4\uff1a * \u5c06\u56fe\u7247\u590d\u5236\u5230\u76f8\u5e94\u7684\u8def\u5f84\u4e0b\u5e76\u6539\u540d * \u5728\u6839\u76ee\u5f55\u4e0b\u76f8\u5e94\u7684txt\u6587\u4ef6\u4e2d\u5199\u5165\u8be5\u56fe\u7247\u7684\u76f8\u5bf9\u8def\u5f84 * \u89e3\u6790xml\u6587\u4ef6\uff0c\u5728\u76f8\u5e94\u8def\u5f84\u4e0b\u751f\u6210\u5bf9\u5e94\u7684txt\u6807\u6ce8\u6587\u4ef6 * \u9a8c\u8bc1\u96c6\u8fd8\u9700\u751f\u6210\u6700\u7ec8\u7684json\u6587\u4ef6

\u8be6\u7ec6\u5b9e\u73b0\u53ef\u53c2\u8003convert_shwd2yolo.py\uff0c\u8fd0\u884c\u65b9\u5f0f\u5982\u4e0b\uff1a

python examples/finetune_SHWD/convert_shwd2yolo.py --root_dir /path_to_shwd/SHWD\n
\u8fd0\u884c\u4ee5\u4e0a\u547d\u4ee4\u5c06\u5728\u4e0d\u6539\u53d8\u539f\u6570\u636e\u96c6\u7684\u524d\u63d0\u4e0b\uff0c\u5728\u540c\u7ea7\u76ee\u5f55\u751f\u6210yolo\u683c\u5f0f\u7684SHWD\u6570\u636e\u96c6\u3002

"},{"location":"zh/tutorials/finetune/#yaml","title":"\u7f16\u5199yaml\u914d\u7f6e\u6587\u4ef6","text":"

\u914d\u7f6e\u6587\u4ef6\u4e3b\u8981\u5305\u542b\u6570\u636e\u96c6\u3001\u6570\u636e\u589e\u5f3a\u3001loss\u3001optimizer\u3001\u6a21\u578b\u7ed3\u6784\u6d89\u53ca\u7684\u76f8\u5e94\u53c2\u6570\uff0c\u7531\u4e8eMindYOLO\u63d0\u4f9byaml\u6587\u4ef6\u7ee7\u627f\u673a\u5236\uff0c\u53ef\u53ea\u5c06\u9700\u8981\u8c03\u6574\u7684\u53c2\u6570\u7f16\u5199\u4e3ayolov7-tiny_shwd.yaml\uff0c\u5e76\u7ee7\u627fMindYOLO\u63d0\u4f9b\u7684\u539f\u751fyaml\u6587\u4ef6\u5373\u53ef\uff0c\u5176\u5185\u5bb9\u5982\u4e0b\uff1a

__BASE__: [\n  '../../configs/yolov7/yolov7-tiny.yaml',\n]\n\nper_batch_size: 16 # \u5355\u5361batchsize\uff0c\u603b\u7684batchsize=per_batch_size * device_num\nimg_size: 640 # image sizes\nweight: ./yolov7-tiny_pretrain.ckpt\nstrict_load: False # \u662f\u5426\u6309\u4e25\u683c\u52a0\u8f7dckpt\u5185\u53c2\u6570\uff0c\u9ed8\u8ba4True\uff0c\u82e5\u8bbe\u6210False\uff0c\u5f53\u5206\u7c7b\u6570\u4e0d\u4e00\u81f4\uff0c\u4e22\u6389\u6700\u540e\u4e00\u5c42\u5206\u7c7b\u5668\u7684weight\nlog_interval: 10 # \u6bcflog_interval\u6b21\u8fed\u4ee3\u6253\u5370\u4e00\u6b21loss\u7ed3\u679c\n\ndata:\n  dataset_name: shwd\n  train_set: ./SHWD/train.txt # \u5b9e\u9645\u8bad\u7ec3\u6570\u636e\u8def\u5f84\n  val_set: ./SHWD/val.txt\n  test_set: ./SHWD/val.txt\n  nc: 2 # \u5206\u7c7b\u6570\n  # class names\n  names: [ 'person',  'hat' ] # \u6bcf\u4e00\u7c7b\u7684\u540d\u5b57\n\noptimizer:\n  lr_init: 0.001  # initial learning rate\n
* __BASE__\u4e3a\u4e00\u4e2a\u5217\u8868\uff0c\u8868\u793a\u7ee7\u627f\u7684yaml\u6587\u4ef6\u6240\u5728\u8def\u5f84\uff0c\u53ef\u4ee5\u7ee7\u627f\u591a\u4e2ayaml\u6587\u4ef6 * per_batch_size\u548cimg_size\u5206\u522b\u8868\u793a\u5355\u5361\u4e0a\u7684batch_size\u548c\u6570\u636e\u5904\u7406\u56fe\u7247\u91c7\u7528\u7684\u56fe\u7247\u5c3a\u5bf8 * weight\u4e3a\u4e0a\u8ff0\u63d0\u5230\u7684\u9884\u8bad\u7ec3\u6a21\u578b\u7684\u6587\u4ef6\u8def\u5f84\uff0cstrict_load\u8868\u793a\u4e22\u5f03shape\u4e0d\u4e00\u81f4\u7684\u53c2\u6570 * log_interval\u8868\u793a\u65e5\u5fd7\u6253\u5370\u95f4\u9694 * data\u5b57\u6bb5\u4e0b\u5168\u90e8\u4e3a\u6570\u636e\u96c6\u76f8\u5173\u53c2\u6570\uff0c\u5176\u4e2ddataset_name\u4e3a\u81ea\u5b9a\u4e49\u6570\u636e\u96c6\u540d\u79f0\uff0ctrain_set\u3001val_set\u3001test_set\u5206\u522b\u4e3a\u4fdd\u5b58\u8bad\u7ec3\u96c6\u3001\u9a8c\u8bc1\u96c6\u3001\u6d4b\u8bd5\u96c6\u56fe\u7247\u8def\u5f84\u7684txt\u6587\u4ef6\u8def\u5f84\uff0cnc\u4e3a\u7c7b\u522b\u6570\u91cf\uff0cnames\u4e3a\u7c7b\u522b\u540d\u79f0 * optimizer\u5b57\u6bb5\u4e0b\u7684lr_init\u4e3a\u7ecf\u8fc7warm_up\u4e4b\u540e\u7684\u521d\u59cb\u5316\u5b66\u4e60\u7387\uff0c\u6b64\u5904\u76f8\u6bd4\u9ed8\u8ba4\u53c2\u6570\u7f29\u5c0f\u4e8610\u500d

\u53c2\u6570\u7ee7\u627f\u5173\u7cfb\u548c\u53c2\u6570\u8bf4\u660e\u53ef\u53c2\u8003configuration\u3002

"},{"location":"zh/tutorials/finetune/#_3","title":"\u4e0b\u8f7d\u9884\u8bad\u7ec3\u6a21\u578b","text":"

\u53ef\u9009\u7528MindYOLO\u63d0\u4f9b\u7684\u6a21\u578b\u4ed3\u5e93\u4f5c\u4e3a\u81ea\u5b9a\u4e49\u6570\u636e\u96c6\u7684\u9884\u8bad\u7ec3\u6a21\u578b\uff0c\u9884\u8bad\u7ec3\u6a21\u578b\u5728COCO\u6570\u636e\u96c6\u4e0a\u5df2\u7ecf\u6709\u8f83\u597d\u7684\u7cbe\u5ea6\u8868\u73b0\uff0c\u76f8\u6bd4\u4ece\u5934\u8bad\u7ec3\uff0c\u52a0\u8f7d\u9884\u8bad\u7ec3\u6a21\u578b\u4e00\u822c\u4f1a\u62e5\u6709\u66f4\u5feb\u7684\u6536\u655b\u901f\u5ea6\u4ee5\u53ca\u66f4\u9ad8\u7684\u6700\u7ec8\u7cbe\u5ea6\uff0c\u5e76\u4e14\u5927\u6982\u7387\u80fd\u907f\u514d\u521d\u59cb\u5316\u4e0d\u5f53\u5bfc\u81f4\u7684\u68af\u5ea6\u6d88\u5931\u3001\u68af\u5ea6\u7206\u70b8\u7b49\u95ee\u9898\u3002

\u81ea\u5b9a\u4e49\u6570\u636e\u96c6\u7c7b\u522b\u6570\u901a\u5e38\u4e0eCOCO\u6570\u636e\u96c6\u4e0d\u4e00\u81f4\uff0cMindYOLO\u4e2d\u5404\u6a21\u578b\u7684\u68c0\u6d4b\u5934head\u7ed3\u6784\u8ddf\u6570\u636e\u96c6\u7c7b\u522b\u6570\u6709\u5173\uff0c\u76f4\u63a5\u5c06\u9884\u8bad\u7ec3\u6a21\u578b\u5bfc\u5165\u53ef\u80fd\u4f1a\u56e0\u4e3ashape\u4e0d\u4e00\u81f4\u800c\u5bfc\u5165\u5931\u8d25\uff0c\u53ef\u4ee5\u5728yaml\u914d\u7f6e\u6587\u4ef6\u4e2d\u8bbe\u7f6estrict_load\u53c2\u6570\u4e3aFalse\uff0cMindYOLO\u5c06\u81ea\u52a8\u820d\u5f03shape\u4e0d\u4e00\u81f4\u7684\u53c2\u6570\uff0c\u5e76\u629b\u51fa\u8be5module\u53c2\u6570\u5e76\u672a\u5bfc\u5165\u7684\u544a\u8b66

"},{"location":"zh/tutorials/finetune/#finetune_1","title":"\u6a21\u578b\u5fae\u8c03(Finetune)","text":"

\u6a21\u578b\u5fae\u8c03\u8fc7\u7a0b\u4e2d\uff0c\u53ef\u9996\u5148\u6309\u7167\u9ed8\u8ba4\u914d\u7f6e\u8fdb\u884c\u8bad\u7ec3\uff0c\u5982\u6548\u679c\u4e0d\u4f73\uff0c\u53ef\u8003\u8651\u8c03\u6574\u4ee5\u4e0b\u53c2\u6570\uff1a * \u5b66\u4e60\u7387\u53ef\u8c03\u5c0f\u4e00\u4e9b\uff0c\u9632\u6b62loss\u96be\u4ee5\u6536\u655b * per_batch_size\u53ef\u6839\u636e\u5b9e\u9645\u663e\u5b58\u5360\u7528\u8c03\u6574\uff0c\u901a\u5e38per_batch_size\u8d8a\u5927\uff0c\u68af\u5ea6\u8ba1\u7b97\u8d8a\u7cbe\u786e * epochs\u53ef\u6839\u636eloss\u662f\u5426\u6536\u655b\u8fdb\u884c\u8c03\u6574 * anchor\u53ef\u6839\u636e\u5b9e\u9645\u7269\u4f53\u5927\u5c0f\u8fdb\u884c\u8c03\u6574

\u7531\u4e8eSHWD\u8bad\u7ec3\u96c6\u53ea\u6709\u7ea66000\u5f20\u56fe\u7247\uff0c\u9009\u7528yolov7-tiny\u6a21\u578b\u8fdb\u884c\u8bad\u7ec3\u3002 * \u5728\u591a\u5361NPU/GPU\u4e0a\u8fdb\u884c\u5206\u5e03\u5f0f\u6a21\u578b\u8bad\u7ec3\uff0c\u4ee58\u5361\u4e3a\u4f8b:

mpirun --allow-run-as-root -n 8 python train.py --config ./examples/finetune_SHWD/yolov7-tiny_shwd.yaml --is_parallel True\n
  • \u5728\u5355\u5361NPU/GPU/CPU\u4e0a\u8bad\u7ec3\u6a21\u578b\uff1a

python train.py --config ./examples/finetune_SHWD/yolov7-tiny_shwd.yaml 
\u6ce8\u610f\uff1a\u76f4\u63a5\u7528yolov7-tiny\u9ed8\u8ba4\u53c2\u6570\u5728SHWD\u6570\u636e\u96c6\u4e0a\u8bad\u7ec3\uff0c\u53ef\u53d6\u5f97AP50 87.0\u7684\u7cbe\u5ea6\u3002\u5c06lr_init\u53c2\u6570\u75310.01\u6539\u4e3a0.001\uff0c\u5373\u53ef\u5b9e\u73b0ap50\u4e3a89.2\u7684\u7cbe\u5ea6\u7ed3\u679c\u3002

"},{"location":"zh/tutorials/finetune/#_4","title":"\u53ef\u89c6\u5316\u63a8\u7406","text":"

\u4f7f\u7528/demo/predict.py\u5373\u53ef\u7528\u8bad\u7ec3\u597d\u7684\u6a21\u578b\u8fdb\u884c\u53ef\u89c6\u5316\u63a8\u7406\uff0c\u8fd0\u884c\u65b9\u5f0f\u5982\u4e0b\uff1a

python demo/predict.py --config ./examples/finetune_SHWD/yolov7-tiny_shwd.yaml --weight=/path_to_ckpt/WEIGHT.ckpt --image_path /path_to_image/IMAGE.jpg\n
\u63a8\u7406\u6548\u679c\u5982\u4e0b\uff1a

"},{"location":"zh/tutorials/modelarts/","title":"MindYOLO ModelArts\u8bad\u7ec3\u5feb\u901f\u5165\u95e8","text":"

\u672c\u6587\u4e3b\u8981\u4ecb\u7ecdMindYOLO\u501f\u52a9ModelArts\u5e73\u53f0\u7684\u8bad\u7ec3\u65b9\u6cd5\u3002 ModelArts\u76f8\u5173\u6559\u7a0b\u53c2\u8003\u5e2e\u52a9\u4e2d\u5fc3

"},{"location":"zh/tutorials/modelarts/#_1","title":"\u51c6\u5907\u6570\u636e\u53ca\u4ee3\u7801","text":"

\u4f7f\u7528OBS\u670d\u52a1\u4e0a\u4f20\u6570\u636e\u96c6\uff0c\u76f8\u5173\u64cd\u4f5c\u6559\u7a0b\u89c1OBS\u7528\u6237\u6307\u5357\uff0c\u83b7\u53d6\u672c\u8d26\u6237\u7684AK\uff0c\u670d\u52a1\u5668\u5730\u5740\u8bf7\u54a8\u8be2\u5bf9\u5e94\u5e73\u53f0\u7ba1\u7406\u5458\u6216\u8d26\u53f7\u8d1f\u8d23\u4eba\uff0c\u5982AK\u4e0d\u5728\u7528\u6237\u6307\u5357\u6307\u5b9a\u4f4d\u7f6e\uff0c\u4e5f\u8bf7\u54a8\u8be2\u5e73\u53f0\u7ba1\u7406\u5458\u6216\u8d26\u53f7\u8d1f\u8d23\u4eba\u3002 \u64cd\u4f5c\uff1a 1. \u767b\u5f55obs browser+ 2. \u521b\u5efa\u6876 -> \u65b0\u5efa\u6587\u4ef6\u5939\uff08\u5982\uff1acoco\uff09 3. \u4e0a\u4f20\u6570\u636e\u6587\u4ef6\uff0c\u8bf7\u5c06\u6570\u636e\u6587\u4ef6\u7edf\u4e00\u5355\u72ec\u653e\u7f6e\u5728\u4e00\u4e2a\u6587\u4ef6\u5939\u5185\uff08\u5373\u7528\u4f8b\u4e2d\u7684coco\uff09\uff0c\u4ee3\u7801\u4e2d\u4f1a\u5bf9obs\u6876\u5185\u6570\u636e\u505a\u62f7\u8d1d\uff0c\u62f7\u8d1d\u5185\u5bb9\u4e3a\u6b64\u6587\u4ef6\u5939\uff08\u5982\uff1acoco\uff09\u4e0b\u6240\u6709\u7684\u6587\u4ef6\u3002\u5982\u672a\u65b0\u5efa\u6587\u4ef6\u5939\uff0c\u5c31\u65e0\u6cd5\u9009\u62e9\u5b8c\u6574\u6570\u636e\u96c6\u3002

"},{"location":"zh/tutorials/modelarts/#_2","title":"\u51c6\u5907\u4ee3\u7801","text":"

\u540c\u6837\u4f7f\u7528OBS\u670d\u52a1\u4e0a\u4f20\u8bad\u7ec3\u4ee3\u7801\u3002 \u64cd\u4f5c\uff1a\u521b\u5efa\u6876 -> \u65b0\u5efa\u6587\u4ef6\u5939\uff08\u5982\uff1amindyolo\uff09-> \u4e0a\u4f20\u4ee3\u7801\u6587\u4ef6\uff0c\u5728mindyolo\u540c\u5c42\u7ea7\u4e0b\u521b\u5efaoutput\u6587\u4ef6\u5939\u7528\u4e8e\u5b58\u653e\u8bad\u7ec3\u8bb0\u5f55\uff0c\u521b\u5efalog\u6587\u4ef6\u5939\u7528\u4e8e\u5b58\u653e\u65e5\u5fd7\u3002

"},{"location":"zh/tutorials/modelarts/#_3","title":"\u65b0\u5efa\u7b97\u6cd5","text":"
  1. \u5728\u9009\u9879\u5361\u4e2d\u9009\u62e9\u7b97\u6cd5\u7ba1\u7406->\u521b\u5efa\u3002
  2. \u81ea\u5b9a\u4e49\u7b97\u6cd5\u540d\u79f0\uff0c\u9884\u5236\u6846\u67b6\u9009\u62e9Ascend-Powered-Engine\uff0cmaster\u5206\u652f\u8bf7\u9009\u62e9MindSpore-2.0\u7248\u672c\u955c\u50cf\uff0cr0.1\u5206\u652f\u8bf7\u9009\u62e9MindSpore-1.8.1\u7248\u672c\u955c\u50cf\uff0c\u8bbe\u7f6e\u4ee3\u7801\u76ee\u5f55\u3001\u542f\u52a8\u6587\u4ef6\u3001\u8f93\u5165\u3001\u8f93\u51fa\u4ee5\u53ca\u8d85\u53c2\u3002
  • \u5982\u9700\u52a0\u8f7d\u9884\u8bad\u7ec3\u6743\u91cd\uff0c\u53ef\u5728\u9009\u62e9\u6a21\u578b\u4e2d\u9009\u62e9\u5df2\u4e0a\u4f20\u7684\u6a21\u578b\u6587\u4ef6\uff0c\u5e76\u5728\u8fd0\u884c\u53c2\u6570\u4e2d\u589e\u52a0ckpt_dir\u53c2\u6570
  • \u542f\u52a8\u6587\u4ef6\u4e3atrain.py
  • \u8fd0\u884c\u8d85\u53c2\u9700\u6dfb\u52a0enable_modelarts\uff0c\u503c\u4e3aTrue
  • \u8fd0\u884c\u8d85\u53c2config\u8def\u5f84\u53c2\u8003\u8bad\u7ec3\u4f5c\u4e1a\u4e2d\u8fd0\u884c\u73af\u5883\u9884\u89c8\u7684\u76ee\u5f55\uff0c\u5982/home/ma-user/modelarts/user-job-dir/mindyolo/configs/yolov5/yolov5n.yaml
  • \u5982\u6d89\u53ca\u5206\u5e03\u5f0f\u8bad\u7ec3\u573a\u666f\uff0c\u9700\u589e\u52a0\u8d85\u53c2is_parallel\uff0c\u5e76\u5728\u5206\u5e03\u5f0f\u8fd0\u884c\u65f6\u8bbe\u7f6e\u4e3aTrue\uff0c\u5355\u5361\u65f6\u4e3aFalse
"},{"location":"zh/tutorials/modelarts/#_4","title":"\u65b0\u5efa\u4f5c\u4e1a","text":"
  1. \u5728ModelArts\u670d\u52a1\u4e2d\u9009\u62e9\uff1a\u8bad\u7ec3\u7ba1\u7406 -> \u8bad\u7ec3\u4f5c\u4e1a -> \u521b\u5efa\u8bad\u7ec3\u4f5c\u4e1a\uff0c\u8bbe\u7f6e\u4f5c\u4e1a\u540d\u79f0\uff0c\u9009\u62e9\u4e0d\u7eb3\u5165\u5b9e\u9a8c\uff1b\u521b\u5efa\u65b9\u5f0f->\u6211\u7684\u7b97\u6cd5\u9009\u62e9\u521a\u624d\u65b0\u5efa\u7684\u7b97\u6cd5\uff1b
  2. \u8bad\u7ec3\u8f93\u5165->\u6570\u636e\u5b58\u50a8\u4f4d\u7f6e\uff0c\u9009\u62e9\u521a\u624d\u521b\u5efa\u7684obs\u6570\u636e\u6876\uff08\u793a\u4f8b\u4e2d\u4e3acoco\uff09\uff0c\u8bad\u7ec3\u8f93\u51fa\u9009\u62e9\u51c6\u5907\u4ee3\u7801\u65f6\u7684output\u6587\u4ef6\u5939\uff0c\u5e76\u6839\u636e\u8fd0\u884c\u73af\u5883\u9884\u89c8\u8bbe\u7f6e\u597dconfig\u8d85\u53c2\u503c;
  3. \u9009\u62e9\u8d44\u6e90\u6c60\u3001\u89c4\u683c\u3001\u8ba1\u7b97\u8282\u70b9\uff0c\u4f5c\u4e1a\u65e5\u5fd7\u8def\u5f84\u9009\u62e9\u521b\u5efa\u4ee3\u7801\u65f6\u7684log\u6587\u4ef6\u5939
  4. \u63d0\u4ea4\u8bad\u7ec3\uff0c\u6392\u961f\u540e\u4f1a\u8fdb\u5165\u8fd0\u884c\u4e2d
"},{"location":"zh/tutorials/modelarts/#_5","title":"\u4fee\u6539\u4f5c\u4e1a","text":"

\u5728\u8bad\u7ec3\u4f5c\u4e1a\u9875\u9762\u9009\u62e9\u91cd\u5efa\uff0c\u53ef\u4fee\u6539\u9009\u62e9\u7684\u4f5c\u4e1a\u914d\u7f6e

"},{"location":"zh/tutorials/quick_start/","title":"\u5feb\u901f\u5f00\u59cb","text":""},{"location":"zh/tutorials/quick_start/#mindyolo","title":"MindYOLO \u5feb\u901f\u5165\u95e8","text":"

\u672c\u6587\u7b80\u8981\u4ecb\u7ecdMindYOLO\u4e2d\u5185\u7f6e\u7684\u547d\u4ee4\u884c\u5de5\u5177\u7684\u4f7f\u7528\u65b9\u6cd5\u3002

"},{"location":"zh/tutorials/quick_start/#_2","title":"\u4f7f\u7528\u9884\u8bad\u7ec3\u6a21\u578b\u8fdb\u884c\u63a8\u7406","text":"
  1. \u4ece\u6a21\u578b\u4ed3\u5e93\u4e2d\u9009\u62e9\u4e00\u4e2a\u6a21\u578b\u53ca\u5176\u914d\u7f6e\u6587\u4ef6\uff0c\u4f8b\u5982\uff0c ./configs/yolov7/yolov7.yaml.
  2. \u4ece\u6a21\u578b\u4ed3\u5e93\u4e2d\u4e0b\u8f7d\u76f8\u5e94\u7684\u9884\u8bad\u7ec3\u6a21\u578b\u6743\u91cd\u6587\u4ef6\u3002
  3. \u4f7f\u7528\u5185\u7f6e\u914d\u7f6e\u8fdb\u884c\u63a8\u7406\uff0c\u8bf7\u8fd0\u884c\u4ee5\u4e0b\u547d\u4ee4\uff1a
# NPU (\u9ed8\u8ba4)\npython demo/predict.py --config ./configs/yolov7/yolov7.yaml --weight=/path_to_ckpt/WEIGHT.ckpt --image_path /path_to_image/IMAGE.jpg\n\n# GPU\npython demo/predict.py --config ./configs/yolov7/yolov7.yaml --weight=/path_to_ckpt/WEIGHT.ckpt --image_path /path_to_image/IMAGE.jpg --device_target=GPU\n

\u6709\u5173\u547d\u4ee4\u884c\u53c2\u6570\u7684\u8be6\u7ec6\u4fe1\u606f\uff0c\u8bf7\u53c2\u9605demo/predict.py -h\uff0c\u6216\u67e5\u770b\u5176\u6e90\u4ee3\u7801\u3002

  • \u8981\u5728CPU\u4e0a\u8fd0\u884c\uff0c\u8bf7\u5c06device_target\u7684\u503c\u4fee\u6539\u4e3aCPU.
  • \u7ed3\u679c\u5c06\u4fdd\u5b58\u5728./detect_results\u76ee\u5f55\u4e0b
"},{"location":"zh/tutorials/quick_start/#_3","title":"\u4f7f\u7528\u547d\u4ee4\u884c\u8fdb\u884c\u8bad\u7ec3\u548c\u8bc4\u4f30","text":"
  • \u6309\u7167YOLO\u683c\u5f0f\u51c6\u5907\u60a8\u7684\u6570\u636e\u96c6\u3002\u5982\u679c\u4f7f\u7528COCO\u6570\u636e\u96c6\uff08YOLO\u683c\u5f0f\uff09\u8fdb\u884c\u8bad\u7ec3\uff0c\u8bf7\u4eceyolov5\u6216darknet\u51c6\u5907\u6570\u636e\u96c6.
  coco/\n    {train,val}2017.txt\n    annotations/\n      instances_{train,val}2017.json\n    images/\n      {train,val}2017/\n          00000001.jpg\n          ...\n          # image files that are mentioned in the corresponding train/val2017.txt\n    labels/\n      {train,val}2017/\n          00000001.txt\n          ...\n          # label files that are mentioned in the corresponding train/val2017.txt\n
  • \u5728\u591a\u5361NPU/GPU\u4e0a\u8fdb\u884c\u5206\u5e03\u5f0f\u6a21\u578b\u8bad\u7ec3\uff0c\u4ee58\u5361\u4e3a\u4f8b:
mpirun --allow-run-as-root -n 8 python train.py --config ./configs/yolov7/yolov7.yaml  --is_parallel True\n
  • \u5728\u5355\u5361NPU/GPU/CPU\u4e0a\u8bad\u7ec3\u6a21\u578b\uff1a
python train.py --config ./configs/yolov7/yolov7.yaml 
  • \u5728\u5355\u5361NPU/GPU/CPU\u4e0a\u8bc4\u4f30\u6a21\u578b\u7684\u7cbe\u5ea6\uff1a

python test.py --config ./configs/yolov7/yolov7.yaml --weight /path_to_ckpt/WEIGHT.ckpt\n
* \u5728\u591a\u5361NPU/GPU\u4e0a\u8fdb\u884c\u5206\u5e03\u5f0f\u8bc4\u4f30\u6a21\u578b\u7684\u7cbe\u5ea6\uff1a

mpirun --allow-run-as-root -n 8 python test.py --config ./configs/yolov7/yolov7.yaml --weight /path_to_ckpt/WEIGHT.ckpt --is_parallel True\n

\u6ce8\u610f\uff1a

(1) \u9ed8\u8ba4\u8d85\u53c2\u4e3a8\u5361\u8bad\u7ec3\uff0c\u5355\u5361\u60c5\u51b5\u9700\u8c03\u6574\u90e8\u5206\u53c2\u6570\u3002

(2) \u9ed8\u8ba4\u8bbe\u5907\u4e3aAscend\uff0c\u60a8\u53ef\u4ee5\u6307\u5b9a'device_target'\u7684\u503c\u4e3aAscend/GPU/CPU\u3002

(3) \u6709\u5173\u66f4\u591a\u9009\u9879\uff0c\u8bf7\u53c2\u9605 train/test.py -h\u3002

(4) \u5728CloudBrain\u4e0a\u8fdb\u884c\u8bad\u7ec3\uff0c\u8bf7\u5728\u8fd9\u91cc\u67e5\u770b

"},{"location":"zh/tutorials/quick_start/#_4","title":"\u90e8\u7f72","text":"

\u8bf7\u5728\u90e8\u7f72\u67e5\u770b.

"},{"location":"zh/tutorials/quick_start/#mindyolo-api","title":"\u5728\u4ee3\u7801\u4e2d\u4f7f\u7528MindYOLO API","text":"

\u656c\u8bf7\u671f\u5f85

"}]} \ No newline at end of file diff --git a/sitemap.xml b/sitemap.xml new file mode 100644 index 00000000..7484d576 --- /dev/null +++ b/sitemap.xml @@ -0,0 +1,195 @@ + + + + https://mindspore-lab.github.io/mindyolo/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/installation/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/how_to_guides/callback/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/how_to_guides/data_preparation/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/how_to_guides/write_a_new_model/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/modelzoo/benchmark/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/modelzoo/yolov3/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/modelzoo/yolov4/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/modelzoo/yolov5/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/modelzoo/yolov7/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/modelzoo/yolov8/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/modelzoo/yolox/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/notes/changelog/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/notes/code_of_conduct/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/notes/contributing/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/notes/faq/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/reference/data/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/reference/models/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/tutorials/configuration/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/tutorials/data_augmentation/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/tutorials/deployment/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/tutorials/finetune/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/tutorials/modelarts/ + 2024-08-27 + daily + + + + + + https://mindspore-lab.github.io/mindyolo/tutorials/quick_start/ + 2024-08-27 + daily + + + + + \ No newline at end of file diff --git a/sitemap.xml.gz b/sitemap.xml.gz new file mode 100644 index 0000000000000000000000000000000000000000..9ce478b29e491d7a55ba4b298311b7973729f0e0 GIT binary patch literal 696 zcmV;p0!RHHiwFq(mCa@X|8r?{Wo=<_E_iKh0NvTmQtB`m2H?F{!E_Hn#o=cVcjpSu zS$9JF(Pl^!Oj3(p{Ye9(1u5W2l4;mzYr>m159LSZFWZ9l-atsg*>iM17)Cu{2~P>j zo}+(%|MVx(%i?Zc3MxVM^k62Rqg<)Y456y329hW!@J0@}$dF8OeIU{PBpME;(QeLc zo2!Bz%&Z>Gocrl?ip*c5#a&O|>a5*w<~FB1S=7^{o>vf-$_*FLr+7KYh|0_5fN*pE zol}nHs2<*&jioAhx_B5qjQYb#|6#0$?9VlZB{^mptOUF+QcURk95v1~#+ilAC}HcK z0Nn>u1%hD(Q7;#;qPX5RV7sGT-K*9O3`OY2K_B^#gFf=Zpxu_E&prRu?h_^zrW7mv zVCpi(K@Vpx?Ws3K%6Szl9%m&f#TO>@e!tqAH28+&Lc^8^_ zB0awji?o%{ucR@K8C0>x0ve9(_iCPSFD&Gyy5{~rI7dtUCMz$n==AgMi8js*FJ;?d ztB>9ugD~4yaSwJX*rL`b9IcTrt&uaWr*O2Md}%#7(;A1PHTI=7cBVB6M{D9sYvM|4 z8w}RglhxKD72~S@q;K|>@~q2N$5otRoej33r5_wIU&RS$X_=@1%#I4VmOB@3#g8dL zmZc&rgbS|O#<^Tuwgt8oe(k1TdOP>cTyxdNl2DF6Vb30ctq literal 0 HcmV?d00001 diff --git a/tutorials/configuration/index.html b/tutorials/configuration/index.html new file mode 100644 index 00000000..ab30b6f5 --- /dev/null +++ b/tutorials/configuration/index.html @@ -0,0 +1,1730 @@ + + + + + + + + + + + + + + + + + + + + + + + + Configuration - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Configuration

+

MindYOLO supports parameter parsing from both yaml files and command lines. The parameters which are fixed, complex, highly related to model or with nested structure are placed in yaml files. While the simpler ones or parameters variants with actual cases could be passed in from the command line.

+

The following takes yolov3 as an example to explain how to configure the corresponding parameters.

+

Parameter Inheritance Relationship

+

As follows, the parameter priority is from high to low. When a parameter with the same name appears, the low-priority parameter will be overwritten by the high-priority parameter.

+
    +
  • Parameters inputted with user command lines
  • +
  • Default parameters set in parser from .py files
  • +
  • Parameters in yaml files specified by user command lines
  • +
  • Parameters in yaml files set by __BASE__ contained in yaml files specified by user command lines. Take yolov3 as an example, it contains: +
    __BASE__: [
    +  '../coco.yaml',
    +  './hyp.scratch.yaml',
    +]
    +
  • +
+

Basic Parameters

+

Parameter Description

+
    +
  • device_target: device used, Ascend/GPU/CPU
  • +
  • save_dir: the path to save the running results, the default is ./runs
  • +
  • log_interval: step interval to print logs, the default is 100
  • +
  • is_parallel: whether to perform distributed training, the default is False
  • +
  • ms_mode: whether to use static graph mode (0) or dynamic graph mode (1), the default is 0.
  • +
  • config: yaml configuration file path
  • +
  • per_batch_size: batch size of each card, default is 32
  • +
  • epochs: number of training epochs, default is 300
  • +
  • ...
  • +
+

Parse parameter settings

+

This part of the parameters is usually passed in from the command line. Examples are as follows:

+
mpirun --allow-run-as-root -n 8 python train.py --config ./configs/yolov7/yolov7.yaml  --is_parallel True --log_interval 50
+
+

Dataset

+

Parameter Description

+
    +
  • dataset_name: dataset name
  • +
  • train_set: the path where the training set is located
  • +
  • val_set: the path where the verification set is located
  • +
  • test_set: the path where the test set is located
  • +
  • nc: number of categories in the data set
  • +
  • names: category name + -...
  • +
+

Yaml file sample

+

This part of the parameters is defined in configs/coco.yaml, and the data set path usually needs to be modified.

+
data:
+  dataset_name: coco
+
+  train_set: ./coco/train2017.txt  # 118287 images
+  val_set: ./coco/val2017.txt  # 5000 images
+  test_set: ./coco/test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
+
+  nc: 80
+
+  # class names
+  names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
+           'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+           'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+           'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+           'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+           'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+           'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
+           'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+           'hair drier', 'toothbrush' ]
+
+

Data Augmentation

+

Parameter Description

+
    +
  • num_parallel_workers: number of worker processes reading data
  • +
  • train_transformers: data enhancement during training process
  • +
  • test_transformers: verification process data enhancement + -...
  • +
+

Yaml file sample

+

This part of the parameters is defined in configs/yolov3/hyp.scratch.yaml, where train_transformers and test_transformers are lists composed of dictionaries, each dictionary contains data enhancement operations name, probability of occurrence and parameters related to the enhancement method

+
data:
+  num_parallel_workers: 4
+
+  train_transforms:
+    - { func_name: mosaic, prob: 1.0, mosaic9_prob: 0.0, translate: 0.1, scale: 0.9 }
+    - { func_name: mixup, prob: 0.1, alpha: 8.0, beta: 8.0, needed_mosaic: True }
+    - { func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4 }
+    - { func_name: label_norm, xyxy2xywh_: True }
+    - { func_name: albumentations }
+    - { func_name: fliplr, prob: 0.5 }
+    - { func_name: label_pad, padding_size: 160, padding_value: -1 }
+    - { func_name: image_norm, scale: 255. }
+    - { func_name: image_transpose, bgr2rgb: True, hwc2chw: True }
+
+  test_transforms:
+    - { func_name: letterbox, scaleup: False }
+    - { func_name: label_norm, xyxy2xywh_: True }
+    - { func_name: label_pad, padding_size: 160, padding_value: -1 }
+    - { func_name: image_norm, scale: 255. }
+    - { func_name: image_transpose, bgr2rgb: True, hwc2chw: True }
+
+

Model

+

Parameter Description

+
    +
  • model_name: model name
  • +
  • depth_multiple: model depth factor
  • +
  • width_multiple: model width factor
  • +
  • stride: feature map downsampling multiple
  • +
  • anchors: default anchor box
  • +
  • backbone: model backbone network
  • +
  • head: model detection head
  • +
+

Yaml file sample

+

This part of the parameters is defined in configs/yolov3/yolov3.yaml. The network is constructed based on the backbone and head parameters. The parameters are presented in the form of a nested list, with each line representing a The layer module contains 4 parameters, namely the input layer number (-1 represents the previous layer), the number of module repetitions, the module name and the corresponding parameters of the module. Users can also define and register networks directly in py files without resorting to yaml files. +

network:
+  model_name: yolov3
+
+  depth_multiple: 1.0  # model depth multiple
+  width_multiple: 1.0  # layer channel multiple
+  stride: [8, 16, 32]
+  anchors:
+    - [10,13, 16,30, 33,23]  # P3/8
+    - [30,61, 62,45, 59,119]  # P4/16
+    - [116,90, 156,198, 373,326]  # P5/32
+
+  # darknet53 backbone
+  backbone:
+    # [from, number, module, args]
+    [[-1, 1, ConvNormAct, [32, 3, 1]],  # 0
+     [-1, 1, ConvNormAct, [64, 3, 2]],  # 1-P1/2
+     [-1, 1, Bottleneck, [64]],
+     [-1, 1, ConvNormAct, [128, 3, 2]],  # 3-P2/4
+     [-1, 2, Bottleneck, [128]],
+     [-1, 1, ConvNormAct, [256, 3, 2]],  # 5-P3/8
+     [-1, 8, Bottleneck, [256]],
+     [-1, 1, ConvNormAct, [512, 3, 2]],  # 7-P4/16
+     [-1, 8, Bottleneck, [512]],
+     [-1, 1, ConvNormAct, [1024, 3, 2]],  # 9-P5/32
+     [-1, 4, Bottleneck, [1024]],  # 10
+    ]
+
+  # YOLOv3 head
+  head:
+    [[-1, 1, Bottleneck, [1024, False]],
+     [-1, 1, ConvNormAct, [512, 1, 1]],
+     [-1, 1, ConvNormAct, [1024, 3, 1]],
+     [-1, 1, ConvNormAct, [512, 1, 1]],
+     [-1, 1, ConvNormAct, [1024, 3, 1]],  # 15 (P5/32-large)
+
+     [-2, 1, ConvNormAct, [256, 1, 1]],
+     [-1, 1, Upsample, [None, 2, 'nearest']],
+     [[-1, 8], 1, Concat, [1]],  # cat backbone P4
+     [-1, 1, Bottleneck, [512, False]],
+     [-1, 1, Bottleneck, [512, False]],
+     [-1, 1, ConvNormAct, [256, 1, 1]],
+     [-1, 1, ConvNormAct, [512, 3, 1]],  # 22 (P4/16-medium)
+
+     [-2, 1, ConvNormAct, [128, 1, 1]],
+     [-1, 1, Upsample, [None, 2, 'nearest']],
+     [[-1, 6], 1, Concat, [1]],  # cat backbone P3
+     [-1, 1, Bottleneck, [256, False]],
+     [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
+
+     [[27, 22, 15], 1, YOLOv3Head, [nc, anchors, stride]],   # Detect(P3, P4, P5)
+    ]
+

+

Loss function

+

Parameter Description

+
    +
  • name: loss function name
  • +
  • box: box loss weight
  • +
  • cls: class loss weight
  • +
  • cls_pw: class loss positive sample weight
  • +
  • obj: object loss weight
  • +
  • obj_pw: object loss positive sample weight
  • +
  • fl_gamma: focal loss gamma
  • +
  • anchor_t: anchor shape proportion threshold
  • +
  • label_smoothing: label smoothing value
  • +
+

Yaml file sample

+

This part of the parameters is defined in configs/yolov3/hyp.scratch.yaml

+
loss:
+  name: YOLOv7Loss
+  box: 0.05  # box loss gain
+  cls: 0.5  # cls loss gain
+  cls_pw: 1.0  # cls BCELoss positive_weight
+  obj: 1.0  # obj loss gain (scale with pixels)
+  obj_pw: 1.0  # obj BCELoss positive_weight
+  fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
+  anchor_t: 4.0  # anchor-multiple threshold
+  label_smoothing: 0.0 # label smoothing epsilon
+
+

Optimizer

+

Parameter Description

+
    +
  • optimizer: optimizer name.
  • +
  • lr_init: initial value of learning rate
  • +
  • warmup_epochs: number of warmup epochs
  • +
  • warmup_momentum: initial value of warmup momentum
  • +
  • warmup_bias_lr: initial value of warmup bias learning rate
  • +
  • min_warmup_step: minimum number of warmup steps
  • +
  • group_param: parameter grouping strategy
  • +
  • gp_weight_decay: Group parameter weight decay coefficient
  • +
  • start_factor: initial learning rate factor
  • +
  • end_factor: end learning rate factor
  • +
  • momentum: momentum of the moving average
  • +
  • loss_scale: loss scaling coefficient
  • +
  • nesterov: Whether to use the Nesterov Accelerated Gradient (NAG) algorithm to update the gradient.
  • +
+

Yaml file sample

+

This part of the parameters is defined in configs/yolov3/hyp.scratch.yaml. In the following example, the initial learning rate after the warmup stage is lr_init * start_factor = 0.01 * 1.0 = 0.01, the final learning rate is lr_init * end_factor = 0.01 * 0.01 = 0.0001

+
optimizer:
+  optimizer: momentum
+  lr_init: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
+  momentum: 0.937  # SGD momentum/Adam beta1
+  nesterov: True # update gradients with NAG(Nesterov Accelerated Gradient) algorithm
+  loss_scale: 1.0 # loss scale for optimizer
+  warmup_epochs: 3  # warmup epochs (fractions ok)
+  warmup_momentum: 0.8  # warmup initial momentum
+  warmup_bias_lr: 0.1  # warmup initial bias lr
+  min_warmup_step: 1000 # minimum warmup step
+  group_param: yolov7 # group param strategy
+  gp_weight_decay: 0.0005  # group param weight decay 5e-4
+  start_factor: 1.0
+  end_factor: 0.01
+
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/tutorials/data_augmentation/index.html b/tutorials/data_augmentation/index.html new file mode 100644 index 00000000..49f6cde7 --- /dev/null +++ b/tutorials/data_augmentation/index.html @@ -0,0 +1,1375 @@ + + + + + + + + + + + + + + + + + + + + + + + + Augmentation - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Data Augmentation

+

List of data enhancement methods that come with the package

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Data Enhancement Method NameSummary Explanation
mosaicrandomly select mosaic4 and mosaic9
mosaic44-part splicing
mosaic99-point splicing
mixuplinearly mix two images
pasteinclipping enhancement
random_perspectiverandom perspective transformation
hsv_augmentrandom color transformation
fliplrflip horizontally
flipudvertical flip
letterboxscale and fill
label_normlabel normalization and coordinates normalized to 0-1 to range
label_padfill label information into a fixed-size array
image_normimage data normalization
image_transposechannel transpose and dimension transpose
albumentationsalbumentations data enhancement
+

These data augmentation functions are defined in mindyolo/data/dataset.py.

+

Instructions

+

The MindYOLO data enhancement method is configured in the yaml file. For example, to add a data enhancement during the training process, you need to add a dictionary list under the data.train_transforms field of the yaml file. The data enhancement methods are listed from top to bottom.

+

A typical data enhancement method configuration dictionary must have func_name, which represents the name of the applied data enhancement method, and then lists the parameters that need to be set for the method. If no parameter item is configured in the data enhancement configuration dictionary, the data enhancement method will be selected by default. value.

+

Data enhancement common configuration dictionary: +

- {func_name: data enhancement method name 1, args11=x11, args12=x12, ..., args1n=x1n}
+- {func_name: data enhancement method name 2, args21=x21, args22=x22, ..., args2n=x2n}
+...
+- {func_name: data enhancement method name n, argsn1=xn1, argsn2=xn2, ..., argsnn=xnn}
+

+

Example enhanced with YOLOv7 training data: +

#File directory: configs/yolov7/hyp.scratch.tiny.yaml (https://github.com/mindspore-lab/mindyolo/blob/master/configs/yolov7/hyp.scratch.tiny.yaml)
+  train_transforms:
+    - {func_name: mosaic, prob: 1.0, mosaic9_prob: 0.2, translate: 0.1, scale: 0.5}
+    - {func_name: mixup, prob: 0.05, alpha: 8.0, beta: 8.0, needed_mosaic: True}
+    - {func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4}
+    - {func_name: pastein, prob: 0.05, num_sample: 30}
+    - {func_name: label_norm, xyxy2xywh_: True}
+    - {func_name: fliplr, prob: 0.5}
+    - {func_name: label_pad, padding_size: 160, padding_value: -1}
+    - {func_name: image_norm, scale: 255.}
+    - {func_name: image_transpose, bgr2rgb: True, hwc2chw: True}
+
+Note: func_name represents the name of the data enhancement method, prob, mosaic9_prob, translate, and scale are the method parameters. Among them, prob is a parameter common to all methods, indicating the execution probability of the data enhancement method. The default value is 1

+

The specific operations performed by the above yaml file are as follows:

+
    +
  • +

    mosaic: Perform mosaic operation on the input image with a probability of 1.0, that is, splicing 4 different images into one image. mosaic9_prob represents the probability of splicing using the 9-square grid method, and translate and scale represent the degree of random translation and scaling respectively. +as the picture shows: +

    +
  • +
  • +

    mixup: Perform a mixup operation on the input image with a probability of 0.05, that is, mix two different images. Among them, alpha and beta represent the mixing coefficient, and needed_mosaic represents whether mosaic needs to be used for mixing.

    +
  • +
  • +

    hsv_augment: HSV enhancement, adjust the HSV color space of the input image with a probability of 1.0 to increase data diversity. Among them, hgain, sgain and vgain represent the degree of adjustment of H, S and V channels respectively.

    +
  • +
  • +

    pastein: randomly paste some samples into the input image with a probability of 0.05. Among them, num_sample represents the number of randomly posted samples.

    +
  • +
  • +

    label_norm: Convert the input label from the format of (x1, y1, x2, y2) to the format of (x, y, w, h).

    +
  • +
  • +

    fliplr: Flip the input image horizontally with a probability of 0.5 to increase data diversity.

    +
  • +
  • +

    label_pad: Pad the input labels so that each image has the same number of labels. padding_size represents the number of labels after padding, and padding_value represents the value of padding.

    +
  • +
  • +

    image_norm: Scale the input image pixel value from the range [0, 255] to the range [0, 1].

    +
  • +
  • +

    image_transpose: Convert the input image from BGR format to RGB format, and convert the number of channels of the image from HWC format to CHW format.

    +
  • +
+

Test data enhancement needs to be marked with the test_transforms field, and the configuration method is the same as training.

+

Custom data enhancement

+

Writing Guide:

+
    +
  • Add custom data enhancement methods to the COCODataset class in the mindyolo/data/dataset.py file
  • +
  • Inputs to data augmentation methods usually include images, labels, and custom parameters.
    +-Write function body content and customize output
  • +
+

A typical data enhancement method: +

#Add submethods in mindyolo/data/dataset.py COCODataset
+def data_trans_func(self, image, labels, args1=x1, args2=x2, ..., argsn=xn):
+    # Data enhancement logic
+    ...
+    return image, labels
+
+Customize a data enhancement function whose function is rotation +
#mindyolo/data/dataset.py
+def rotate(self, image, labels, angle):
+    # rotate image
+    image = np.rot90(image, angle // 90)
+    if len(labels):
+        if angle == 90:
+            labels[:, 0], labels[:, 1] = 1 - labels[:, 1], labels[:, 0]
+        elif angle == 180:
+            labels[:, 0], labels[:, 1] = 1 - labels[:, 0], 1 - labels[:, 1]
+        elif angle == 270:
+            labels[:, 0], labels[:, 1] = labels[:, 1], 1 - labels[:, 0]
+    return image, labels
+

+

user's guidance: +- Define this data augmentation method in the form of a dictionary in the model's yaml file. Same usage as described above +

    - {func_name: rotate, angle: 90}
+

+

Show results:

+

     

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/tutorials/deployment/index.html b/tutorials/deployment/index.html new file mode 100644 index 00000000..fe882ac8 --- /dev/null +++ b/tutorials/deployment/index.html @@ -0,0 +1,2068 @@ + + + + + + + + + + + + + + + + + + + + + + + + Deployment - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Deployment

+

Dependencies

+
pip install -r requirement.txt
+
+

MindSpore Lite environment preparation

+

Reference: Lite environment configuration
+ Note: The python environment that MindSpore Lite is adapted to is 3.7. Please prepare the python3.7 environment before installing Lite

+
    +
  1. +

    Depending on the environment, download the matching tar.gz package and whl package.

    +
  2. +
  3. +

    Unzip the tar.gz package and install the corresponding version of the whl package +

    tar -zxvf mindspore_lite-2.0.0a0-cp37-cp37m-{os}_{platform}_64.tar.gz
    +pip install mindspore_lite-2.0.0a0-cp37-cp37m-{os}_{platform}_64.whl
    +

    +
  4. +
  5. Configure Lite environment variables + LITE_HOME is the folder path extracted from tar.gz. It is recommended to use the absolute path. +
    export LITE_HOME=/path/to/mindspore-lite-{version}-{os}-{platform}
    +export LD_LIBRARY_PATH=$LITE_HOME/runtime/lib:$LITE_HOME/tools/converter/lib:$LD_LIBRARY_PATH
    +export PATH=$LITE_HOME/tools/converter/converter:$LITE_HOME/tools/benchmark:$PATH
    +
  6. +
+

Quick Start

+

Model conversion

+

Convert ckpt model to mindir model, this step can be run on CPU/Ascend910 +

python ./deploy/export.py --config ./path_to_config/model.yaml --weight ./path_to_ckpt/weight.ckpt --per_batch_size 1 --file_format MINDIR --device_target [CPU/Ascend]
+e.g.
+#Run on CPU
+python ./deploy/export.py --config ./configs/yolov5/yolov5n.yaml --weight yolov5n_300e_mAP273-9b16bd7b.ckpt --per_batch_size 1 --file_format MINDIR --device_target CPU
+# Run on Ascend
+python ./deploy/export.py --config ./configs/yolov5/yolov5n.yaml --weight yolov5n_300e_mAP273-9b16bd7b.ckpt --per_batch_size 1 --file_format MINDIR --device_target Ascend
+

+

Lite Test

+
python deploy/test.py --model_type Lite --model_path ./path_to_mindir/weight.mindir --config ./path_to_config/yolo.yaml
+e.g.
+python deploy/test.py --model_type Lite --model_path ./yolov5n.mindir --config ./configs/yolov5/yolov5n.yaml
+
+

Lite Predict

+
python ./deploy/predict.py --model_type Lite --model_path ./path_to_mindir/weight.mindir --config ./path_to_conifg/yolo.yaml --image_path ./path_to_image/image.jpg
+e.g.
+python deploy/predict.py --model_type Lite --model_path ./yolov5n.mindir --config ./configs/yolov5/yolov5n.yaml --image_path ./coco/image/val2017/image.jpg
+
+

Script description

+
    +
  • predict.py supports single image inference
  • +
  • test.py supports COCO data set inference
  • +
  • Note: currently only supports inference on Ascend 310
  • +
+

MindX Deployment

+

Environment configuration

+

Reference: MindX environment preparation
+Note: MindX currently supports python version 3.9. Please prepare the python3.9 environment before installing MindX

+
    +
  1. +

    Obtain the [Environment Installation Package] (https://www.hiascend.com/software/mindx-sdk/commercial) from the MindX official website. Currently, version 3.0.0 of MindX infer is supported.

    +
  2. +
  3. +

    Jump to the Download page Download Ascend-mindxsdk-mxmanufacture_{version}_linux-{arch}.run

    +
  4. +
  5. +

    Place the installation package in the Ascend310 machine directory and unzip it

    +
  6. +
  7. +

    If you are not a root user, you need to add executable permissions to the package: +

    chmod +x Ascend-mindxsdk-mxmanufacture_{version}_linux-{arch}.run
    +

    +
  8. +
  9. Enter the upload path of the development kit package and install the mxManufacture development kit package. +
    ./Ascend-mindxsdk-mxmanufacture_{version}_linux-{arch}.run --install
    +
    +After the installation is completed, if the following echo appears, it means that the software was successfully installed. +
    The installation is successful
    +
    +After the installation is complete, the mxManufacture software directory structure is as follows: +
    .
    +├── bin
    +├── config
    +├── filelist.txt
    +├── include
    +├── lib
    +├── opensource
    +├── operators
    +├── python
    +├── samples
    +├── set_env.sh
    +├── toolkit
    +└── version.info
    +
  10. +
  11. Enter the installation directory of mxmanufacture and run the following command to make the MindX SDK environment variables take effect. +
    source set_env.sh
    +
  12. +
  13. Enter ./mxVision-3.0.0/python/ and install mindx-3.0.0-py3-none-any.whl +
    pip install mindx-3.0.0-py3-none-any.whl
    +
  14. +
+

Model conversion

+
    +
  1. +

    Convert ckpt model to air model. This step needs to be performed on Ascend910. +

    python ./deploy/export.py --config ./path_to_config/model.yaml --weight ./path_to_ckpt/weight.ckpt --per_batch_size 1 --file_format AIR
    +e.g.
    +python ./deploy/export.py --config ./configs/yolov5/yolov5n.yaml --weight yolov5n_300e_mAP273-9b16bd7b.ckpt --per_batch_size 1 --file_format AIR
    +
    + yolov7 needs to run export on an Ascend910 machine with version 2.0 or above

    +
  2. +
  3. +

    To convert the air model to the om model, use atc conversion tool. This step requires the installation of MindX Environment, running on Ascend310 +

    atc --model=./path_to_air/weight.air --framework=1 --output=yolo --soc_version=Ascend310
    +

    +
  4. +
+

MindX Test

+

Infer COCO data: +

python ./deploy/test.py --model_type MindX --model_path ./path_to_om/weight.om --config ./path_to_config/yolo.yaml
+e.g.
+python ./deploy/test.py --model_type MindX --model_path ./yolov5n.om --config ./configs/yolov5/yolov5n.yaml
+

+

MindX Predict

+

Infer a single image: +

python ./deploy/predict.py --model_type MindX --model_path ./path_to_om/weight.om --config ./path_to_config/yolo.yaml --image_path ./path_to_image/image.jpg
+e.g.
+python ./deploy/predict.py --model_type MindX --model_path ./yolov5n.om --config ./configs/yolov5/yolov5n.yaml --image_path ./coco/image/val2017/image.jpg
+

+

MindIR Deployment

+

Environmental requirements

+

mindspore>=2.1

+

Precautions

+
    +
  1. +

    Currently only supports Predict

    +
  2. +
  3. +

    Theoretically, it can also run on Ascend910, but it has not been tested.

    +
  4. +
+

Model conversion

+

Convert the ckpt model to the mindir model, this step can be run on the CPU +

python ./deploy/export.py --config ./path_to_config/model.yaml --weight ./path_to_ckpt/weight.ckpt --per_batch_size 1 --file_format MINDIR --device_target CPU
+e.g.
+#Run on CPU
+python ./deploy/export.py --config ./configs/yolov5/yolov5n.yaml --weight yolov5n_300e_mAP273-9b16bd7b.ckpt --per_batch_size 1 --file_format MINDIR --device_target CPU
+

+

MindIR Test

+

Coming soon

+

MindIR Predict

+

Infer a single image: +

python ./deploy/predict.py --model_type MindIR --model_path ./path_to_mindir/weight.mindir --config ./path_to_conifg/yolo.yaml --image_path ./path_to_image/image.jpg
+e.g.
+python deploy/predict.py --model_type MindIR --model_path ./yolov5n.mindir --config ./configs/yolov5/yolov5n.yaml --image_path ./coco/image/val2017/image.jpg
+

+

ONNX deployment

+

Environment configuration

+
pip install onnx>=1.9.0
+pip install onnxruntime>=1.8.0
+
+

Precautions

+
    +
  1. +

    Currently not all mindyolo supports ONNX export and inference (only YoloV3 is used as an example)

    +
  2. +
  3. +

    Currently only supports the Predict function

    +
  4. +
  5. +

    Exporting ONNX requires adjusting the nn.SiLU operator and using the underlying implementation of the sigmoid operator.

    +
  6. +
+

For example: add the following custom layer and replace all nn.SiLU in mindyolo +

class EdgeSiLU(nn.Cell):
+    """
+    SiLU activation function: x * sigmoid(x). To support for onnx export with nn.SiLU.
+    """
+
+    def __init__(self):
+        super().__init__()
+
+    def construct(self, x):
+        return x * ops.sigmoid(x)
+

+

Model conversion

+

Convert the ckpt model to an ONNX model. This step and the Test step can only be run on the CPU. +

python ./deploy/export.py --config ./path_to_config/model.yaml --weight ./path_to_ckpt/weight.ckpt --per_batch_size 1 --file_format ONNX --device_target [CPU]
+e.g.
+#Run on CPU
+python ./deploy/export.py --config ./configs/yolov3/yolov3.yaml --weight yolov3-darknet53_300e_mAP455-adfb27af.ckpt --per_batch_size 1 --file_format ONNX --device_target CPU
+

+

ONNX Test

+

Coming soon

+

ONNXRuntime Predict

+

Infer a single image: +

python ./deploy/predict.py --model_type ONNX --model_path ./path_to_onnx_model/model.onnx --config ./path_to_config/yolo.yaml --image_path ./path_to_image/image.jpg
+e.g.
+python ./deploy/predict.py --model_type ONNX --model_path ./yolov3.onnx --config ./configs/yolov3/yolov3.yaml --image_path ./coco/image/val2017/image.jpg
+

+

Standard and supported model libraries

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleContextImageSizeDatasetBox mAP (%)ParamsFLOPsRecipeDownload
YOLOv8ND310x1-G640MS COCO 201737.23.2M8.7Gyamlckpt
mindir
YOLOv8SD310x1-G640MS COCO 201744.611.2M28.6Gyamlckpt
mindir
YOLOv8MD310x1-G640MS COCO 201750.525.9M78.9Gyamlckpt
mindir
YOLOv8LD310x1-G640MS COCO 201752.843.7M165.2Gyamlckpt
mindir
YOLOv8XD310x1-G640MS COCO 201753.768.2M257.8Gyamlckpt
mindir
YOLOv7TinyD310x1-G640MS COCO 201737.56.2M13.8Gyamlckpt
mindir
YOLOv7LD310x1-G640MS COCO 201750.836.9M104.7Gyamlckpt
mindir
YOLOv7XD310x1-G640MS COCO 201752.471.3M189.9Gyamlckpt
mindir
YOLOv5ND310x1-G640MS COCO 201727.31.9M4.5Gyamlckpt
mindir
YOLOv5SD310x1-G640MS COCO 201737.67.2M16.5Gyamlckpt
mindir
YOLOv5MD310x1-G640MS COCO 201744.921.2M49.0Gyamlckpt
mindir
YOLOv5LD310x1-G640MS COCO 201748.546.5M109.1Gyamlckpt
mindir
YOLOv5XD310x1-G640MS COCO 201750.586.7M205.7Gyamlckpt
mindir
YOLOv4CSPDarknet53D310x1-G608MS COCO 201745.427.6M52Gyamlckpt
mindir
YOLOv4CSPDarknet53(silu)D310x1-G640MS COCO 201745.827.6M52Gyamlckpt
mindir
YOLOv3Darknet53D310x1-G640MS COCO 201745.561.9M156.4Gyamlckpt
mindir
YOLOXND310x1-G416MS COCO 201724.10.9M1.1Gyamlckpt
mindir
YOLOXTinyD310x1-G416MS COCO 201733.35.1M6.5Gyamlckpt
mindir
YOLOXSD310x1-G640MS COCO 201740.79.0M26.8Gyamlckpt
mindir
YOLOXMD310x1-G640MS COCO 201746.725.3M73.8Gyamlckpt
mindir
YOLOXLD310x1-G640MS COCO 201749.254.2M155.6Gyamlckpt
mindir
YOLOXXD310x1-G640MS COCO 201751.699.1M281.9Gyamlckpt
mindir
YOLOXDarknet53D310x1-G640MS COCO 201747.763.7M185.3Gyamlckpt
mindir
+


+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/tutorials/finetune/index.html b/tutorials/finetune/index.html new file mode 100644 index 00000000..e7fc00ea --- /dev/null +++ b/tutorials/finetune/index.html @@ -0,0 +1,1381 @@ + + + + + + + + + + + + + + + + + + + + + + + + Finetune - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Fine-tuning

+

Custom Dataset Finetune Process

+

This article takes the Safety Hat Wearing Detection Dataset (SHWD) as an example to introduce the main process of finetune on MindYOLO with a custom data set.

+

Dataset Conversion

+

SHWD Dataset uses data labels in voc format, and its file directory is as follows: +

             Root directory
+                ├── Comments
+                │ ├── 000000.xml
+                │ └── 000002.xml
+                ├── Image Collection
+                │ └── Main
+                │ ├── test.txt
+                │ ├── train.txt
+                │ ├── trainval.txt
+                │ └── val.txt
+                └── JPEG image
+                        ├── 000000.jpg
+                        └── 000002.jpg
+
+The xml file under the Annotations folder contains annotation information for each picture. The main contents are as follows: +
<annotation>
+  <folder>JPEGImages</folder>
+  <filename>000377.jpg</filename>
+  <path>F:\baidu\VOC2028\JPEGImages\000377.jpg</path>
+  <source>
+    <database>Unknown</database>
+  </source>
+  <size>
+    <width>750</width>
+    <height>558</height>
+    <depth>3</depth>
+  </size>
+  <segmented>0</segmented>
+  <object>
+    <name>hat</name>
+    <pose>Unspecified</pose>
+    <truncated>0</truncated>
+    <difficult>0</difficult>
+    <bndbox>
+      <xmin>142</xmin>
+      <ymin>388</ymin>
+      <xmax>177</xmax>
+      <ymax>426</ymax>
+    </bndbox>
+  </object>
+
+It contains multiple objects. The name in object is the category name, and xmin, ymin, xmax, and ymax are the coordinates of the upper left corner and lower right corner of the detection frame.

+

The data set format supported by MindYOLO is YOLO format. For details, please refer to Data Preparation

+

Since MindYOLO selects the image name as image_id during the verification phase, the image name can only be of numeric type, not of string type, and the image needs to be renamed. Conversion to SHWD data set format includes the following steps: +* Copy the image to the corresponding path and rename it +* Write the relative path of the image in the corresponding txt file in the root directory +* Parse the xml file and generate the corresponding txt annotation file under the corresponding path +* The verification set also needs to generate the final json file

+

For detailed implementation, please refer to convert_shwd2yolo.py. The operation method is as follows:

+

python examples/finetune_SHWD/convert_shwd2yolo.py --root_dir /path_to_shwd/SHWD
+
+Running the above command will generate a SHWD data set in yolo format in the same directory without changing the original data set.

+

Write yaml configuration file

+

The configuration file mainly contains the corresponding parameters related to the data set, data enhancement, loss, optimizer, and model structure. Since MindYOLO provides a yaml file inheritance mechanism, you can only write the parameters that need to be adjusted as yolov7-tiny_shwd.yaml and inherit the native ones provided by MindYOLO. yaml file, its content is as follows: +

__BASE__: [
+  '../../configs/yolov7/yolov7-tiny.yaml',
+]
+
+per_batch_size: 16 # Single card batchsize, total batchsize=per_batch_size * device_num
+img_size: 640 # image sizes
+weight: ./yolov7-tiny_pretrain.ckpt
+strict_load: False # Whether to strictly load the internal parameters of ckpt. The default is True. If set to False, when the number of classifications is inconsistent, the weight of the last layer of classifiers will be discarded.
+log_interval: 10 #Print the loss result every log_interval iterations
+
+data:
+  dataset_name: shwd
+  train_set: ./SHWD/train.txt # Actual training data path
+  val_set: ./SHWD/val.txt
+  test_set: ./SHWD/val.txt
+  nc: 2 # Number of categories
+  # class names
+  names: [ 'person', 'hat' ] # The name of each category
+
+optimizer:
+  lr_init: 0.001 # initial learning rate
+
+* __BASE__ is a list, indicating the path of the inherited yaml file. Multiple yaml files can be inherited. +* per_batch_size and img_size respectively represent the batch_size on a single card and the image size used for data processing images. +* weight is the file path of the pre-trained model mentioned above, and strict_load means discarding parameters with inconsistent shapes. +* log_interval represents the log printing interval +* All parameters under the data field are data set related parameters, where dataset_name is the name of the custom data set, train_set, val_set, and test_set are the txt file paths that save the training set, validation set, and test set image paths respectively, nc is the number of categories, and names is classification name +* lr_init under the optimizer field is the initial learning rate after warm_up, which is 10 times smaller than the default parameters.

+

For parameter inheritance relationship and parameter description, please refer to Configuration.

+

Download pre-trained model

+

You can choose the Model Warehouse provided by MindYOLO as the pre-training model for the custom data set. The pre-training model already has better accuracy performance on the COCO data set. Compared with training from scratch, loading a pre-trained model will generally have faster convergence speed and higher final accuracy, and will most likely avoid problems such as gradient disappearance and gradient explosion caused by improper initialization.

+

The number of categories in the custom data set is usually inconsistent with the COCO data set. The detection head structure of each model in MindYOLO is related to the number of categories in the data set. Directly importing the pre-trained model may fail due to inconsistent shape. You can configure it in the yaml configuration file. Set the strict_load parameter to False, MindYOLO will automatically discard parameters with inconsistent shapes and throw a warning that the module parameter is not imported.

+

Model fine-tuning (Finetune)

+

During the process of model fine-tuning, you can first train according to the default configuration. If the effect is not good, you can consider adjusting the following parameters: +* The learning rate can be adjusted smaller to prevent loss from being difficult to converge. +* per_batch_size can be adjusted according to the actual video memory usage. Generally, the larger per_batch_size is, the more accurate the gradient calculation will be. +* Epochs can be adjusted according to whether the loss converges +* Anchor can be adjusted according to the actual object size

+

Since the SHWD training set only has about 6,000 images, the yolov7-tiny model was selected for training. +* Distributed model training on multi-card NPU/GPU, taking 8 cards as an example:

+
mpirun --allow-run-as-root -n 8 python train.py --config ./examples/finetune_SHWD/yolov7-tiny_shwd.yaml --is_parallel True
+
+
    +
  • Train the model on a single card NPU/GPU/CPU:
  • +
+

python train.py --config ./examples/finetune_SHWD/yolov7-tiny_shwd.yaml
+
+*Note: Directly using the default parameters of yolov7-tiny to train on the SHWD data set can achieve an accuracy of AP50 87.0. Changing the lr_init parameter from 0.01 to 0.001 can achieve an accuracy result of ap50 of 89.2. *

+

Visual reasoning

+

Use /demo/predict.py to use the trained model for visual reasoning. The operation method is as follows:

+

python demo/predict.py --config ./examples/finetune_SHWD/yolov7-tiny_shwd.yaml --weight=/path_to_ckpt/WEIGHT.ckpt --image_path /path_to_image/IMAGE.jpg
+
+The reasoning effect is as follows:

+
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/tutorials/modelarts/index.html b/tutorials/modelarts/index.html new file mode 100644 index 00000000..4c51b413 --- /dev/null +++ b/tutorials/modelarts/index.html @@ -0,0 +1,1288 @@ + + + + + + + + + + + + + + + + + + + + + + + + CloudBrain - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

MindYOLO ModelArts Training Quick Start

+

This article mainly introduces the training method of MindYOLO using the ModelArts platform. +ModelArts related tutorial reference Help Center

+

Prepare data and code

+

Use the OBS service to upload data sets. For related operation tutorials, see OBS User Guide to obtain the [AK] of this account (https:// docs.xckpjs.com/zh-cn/browsertg/obs/obs_03_1007.html), please consult the corresponding platform administrator or account person in charge for the server address. If the AK is not in the location specified in the user guide, please also consult the platform administrator or account person in charge. .
+operate:

+
    +
  1. Log in to obs browser+ + obs
  2. +
  3. Create a bucket -> create a new folder (eg: coco) + Bucket
  4. +
  5. To upload data files, please place the data files in a separate folder (that is, coco in the use case). The code will copy the data in the obs bucket, and the copied content will be all files in this folder (such as coco). document. Without creating a new folder, you cannot select the complete data set.
  6. +
+

Dataset

+

Prepare code

+

Also use the OBS service to upload the training code.
+Operation: Create a bucket -> Create a new folder (such as: mindyolo) -> Upload the code file, create an output folder at the same level of mindyolo to store training records, and create a log folder to store logs. + Bucket directory + Kit code

+

Create new algorithm

+
    +
  1. Select Algorithm Management->Create in the tab. + Create algorithm
  2. +
  3. Customize the algorithm name, select Ascend-Powered-Engine for the prefabricated framework, select the MindSpore-2.0 version image for the master branch, and select the MindSpore-1.8.1 version image for the r0.1 branch. Set the code directory, startup file, input, and output. and superparameters. + Algorithm configuration
  4. +
+
    +
  • If you need to load pre-trained weights, you can select the uploaded model file in the model selection and add the ckpt_dir parameter in the running parameters. + ckpt
  • +
  • The startup file is train.py
  • +
  • To run super parameters, enable_modelarts needs to be added, and the value is True.
  • +
  • The running super parameter config path refers to the directory of the running environment preview in the training job, such as /home/ma-user/modelarts/user-job-dir/mindyolo/configs/yolov5/yolov5n.yaml
  • +
  • If distributed training scenarios are involved, the hyperparameter is_parallel needs to be added and set to True when running in distributed mode and False when running on a single card.
  • +
+

Create new job

+
    +
  1. Select in the ModelArts service: Training Management -> Training Jobs -> Create a training job, set the job name, and choose not to include it in the experiment; Create Method -> My Algorithm, select the newly created algorithm; + task + task1
  2. +
  3. Training input -> Data storage location, select the obs data bucket just created (coco in the example), select the output folder when preparing the code for training output, and set the config hyperparameter value according to the running environment preview; + task2
  4. +
  5. Select the resource pool, specifications, computing nodes, and select the log folder when creating the code for the job log path. + task3 + Specifications
  6. +
  7. Submit training and it will be running after queuing.
  8. +
+

Modify job

+

Select Rebuild on the training job page to modify the selected job configuration.

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/tutorials/quick_start/index.html b/tutorials/quick_start/index.html new file mode 100644 index 00000000..66d40ab3 --- /dev/null +++ b/tutorials/quick_start/index.html @@ -0,0 +1,1323 @@ + + + + + + + + + + + + + + + + + + + + + + + + Quick Start - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+ +
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Quick Start

+

Getting Started with MindYOLO

+

This document provides a brief introduction to the usage of built-in command-line tools in MindYOLO.

+

Inference Demo with Pre-trained Models

+
    +
  1. Pick a model and its config file from the Model Zoo, such as, ./configs/yolov7/yolov7.yaml.
  2. +
  3. Download the corresponding pre-trained checkpoint from the Model Zoo of each model.
  4. +
  5. To run YOLO object detection with the built-in configs, please run:
  6. +
+
# Run with Ascend (By default)
+python demo/predict.py --config ./configs/yolov7/yolov7.yaml --weight=/path_to_ckpt/WEIGHT.ckpt --image_path /path_to_image/IMAGE.jpg
+
+# Run with GPU
+python demo/predict.py --config ./configs/yolov7/yolov7.yaml --weight=/path_to_ckpt/WEIGHT.ckpt --image_path /path_to_image/IMAGE.jpg --device_target=GPU
+
+

For details of the command line arguments, see demo/predict.py -h or look at its source code. +to understand their behavior. Some common arguments are: +* To run on cpu, modify device_target to CPU. +* The results will be saved in ./detect_results

+

Training & Evaluation in Command Line

+
    +
  • Prepare your dataset in YOLO format. If training with COCO (YOLO format), please prepare it from yolov5 or the darknet.
  • +
+
+ +
  coco/
+    {train,val}2017.txt
+    annotations/
+      instances_{train,val}2017.json
+    images/
+      {train,val}2017/
+          00000001.jpg
+          ...
+          # image files that are mentioned in the corresponding train/val2017.txt
+    labels/
+      {train,val}2017/
+          00000001.txt
+          ...
+          # label files that are mentioned in the corresponding train/val2017.txt
+
+
+ +
    +
  • +

    To train a model on 8 NPUs/GPUs: +

    mpirun --allow-run-as-root -n 8 python train.py --config ./configs/yolov7/yolov7.yaml  --is_parallel True
    +

    +
  • +
  • +

    To train a model on 1 NPU/GPU/CPU: +

    python train.py --config ./configs/yolov7/yolov7.yaml 
    +

    +
  • +
  • +

    To evaluate a model's performance on 1 NPU/GPU/CPU: +

    python test.py --config ./configs/yolov7/yolov7.yaml --weight /path_to_ckpt/WEIGHT.ckpt
    +

    +
  • +
  • To evaluate a model's performance 8 NPUs/GPUs: +
    mpirun --allow-run-as-root -n 8 python test.py --config ./configs/yolov7/yolov7.yaml --weight /path_to_ckpt/WEIGHT.ckpt --is_parallel True
    +
    +Notes:
  • +
+

(1) The default hyper-parameter is used for 8-card training, and some parameters need to be adjusted in the case of a single card.

+

(2) The default device is Ascend, and you can modify it by specifying 'device_target' as Ascend/GPU/CPU, as these are currently supported.

+

(3) For more options, see train/test.py -h.

+

(4) To train on CloudBrain, see here

+

Deployment

+

See here.

+

To use MindYOLO APIs in Your Code

+

Coming soon.

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/how_to_guides/callback/index.html b/zh/how_to_guides/callback/index.html new file mode 100644 index 00000000..8ed505af --- /dev/null +++ b/zh/how_to_guides/callback/index.html @@ -0,0 +1,1174 @@ + + + + + + + + + + + + + + + + + + + + + + + + 回调函数用法 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

MindYOLO回调函数用法

+

回调函数:当程序运行到某个挂载点时,会自动调用在运行时注册到该挂载点的所有方法。 +通过回调函数的形式可以增加程序的灵活性和扩展性,因为用户可以将自定义方法注册到要调用的挂载点,而无需修改程序中的代码。

+

在MindYOLO中,回调函数具体实现在mindyolo/utils/callback.py文件中。 +

#mindyolo/utils/callback.py
+@CALLBACK_REGISTRY.registry_module()
+class callback_class_name(BaseCallback):
+
+    def __init__(self, **kwargs):
+        super().__init__()
+        ...
+    def callback_fn_name(self, run_context: RunContext):
+        pass
+

+

通过模型的yaml文件callback字段下添加一个字典列表来实现调用 +

#回调函数配置字典:
+callback:
+  - { name: callback_class_name, args: xx }
+  - { name: callback_class_name2, args: xx }
+
+例如以YOLOX为示例:

+

在mindyolo/utils/callback.py文件YoloxSwitchTrain类中on_train_step_begin方法里面添加逻辑,打印“train step begin”的日志 +

@CALLBACK_REGISTRY.registry_module()
+class YoloxSwitchTrain(BaseCallback):
+
+    def on_train_step_begin(self, run_context: RunContext):
+         # 自定义逻辑
+        logger.info("train step begin")
+        pass
+
+YOLOX对应的yaml文件configs/yolox/hyp.scratch.yaml的callback字段下添加该回调函数 +
callback:
+  - { name: YoloxSwitchTrain, switch_epoch_num: 285 }
+
+则每个训练step执行前都会执行logger.info("train step begin")语句。

+

借助回调函数,用户可以自定义某个挂载点需要执行的逻辑,而无需理解完整的训练流程的代码。

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/how_to_guides/data_preparation/index.html b/zh/how_to_guides/data_preparation/index.html new file mode 100644 index 00000000..d37898bc --- /dev/null +++ b/zh/how_to_guides/data_preparation/index.html @@ -0,0 +1,1230 @@ + + + + + + + + + + + + + + + + + + + + + + + + 数据准备 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

数据准备

+

数据集格式介绍

+

下载coco2017 YOLO格式 coco2017labels-segments 以及coco2017 原始图片 train2017 , val2017 ,然后将coco2017 原始图片放到coco2017 YOLO格式 images目录下: +

└─ coco2017_yolo
+    ├─ annotations
+        └─ instances_val2017.json
+    ├─ images
+        ├─ train2017   # coco2017 原始图片
+        └─ val2017     # coco2017 原始图片
+    ├─ labels
+        ├─ train2017
+        └─ val2017
+    ├─ train2017.txt
+    ├─ val2017.txt
+    └─ test-dev2017.txt
+
+其中train.txt文件每行对应单张图片的相对路径,例如: +
./images/train2017/00000000.jpg
+./images/train2017/00000001.jpg
+./images/train2017/00000002.jpg
+./images/train2017/00000003.jpg
+./images/train2017/00000004.jpg
+./images/train2017/00000005.jpg
+
+labels下的train2017文件夹下的txt文件为相应图片的标注信息,支持detect和segment两种格式。

+

detect格式:通常每行有5列,分别对应类别id以及标注框归一化之后的中心点坐标xy和宽高wh +

62 0.417040 0.206280 0.403600 0.412560
+62 0.818810 0.197933 0.174740 0.189680
+39 0.684540 0.277773 0.086240 0.358960
+0 0.620220 0.725853 0.751680 0.525840
+63 0.197190 0.364053 0.394380 0.669653
+39 0.932330 0.226240 0.034820 0.076640
+
+segment格式:每行第一个数据为类别id,后续为两两成对的归一化坐标点x,y

+

45 0.782016 0.986521 0.937078 0.874167 0.957297 0.782021 0.950562 0.739333 0.825844 0.561792 0.714609 0.420229 0.657297 0.391021 0.608422 0.4 0.0303438 0.750562 0.0016875 0.811229 0.003375 0.889896 0.0320156 0.986521
+45 0.557859 0.143813 0.487078 0.0314583 0.859547 0.00897917 0.985953 0.130333 0.984266 0.184271 0.930344 0.386521 0.80225 0.480896 0.763484 0.485396 0.684266 0.39775 0.670781 0.3955 0.679219 0.310104 0.642141 0.253937 0.561234 0.155063 0.559547 0.137083
+50 0.39 0.727063 0.418234 0.649417 0.455297 0.614125 0.476469 0.614125 0.51 0.590583 0.54 0.569417 0.575297 0.562354 0.601766 0.56 0.607062 0.536479 0.614125 0.522354 0.637063 0.501167 0.665297 0.48 0.69 0.477646 0.698828 0.494125 0.698828 0.534125 0.712938 0.529417 0.742938 0.548229 0.760594 0.564708 0.774703 0.550583 0.778234 0.536479 0.781766 0.531771 0.792359 0.541167 0.802937 0.555292 0.802937 0.569417 0.802937 0.576479 0.822359 0.576479 0.822359 0.597646 0.811766 0.607062 0.811766 0.618833 0.818828 0.637646 0.820594 0.656479 0.827641 0.687063 0.827641 0.703521 0.829406 0.727063 0.838234 0.708229 0.852359 0.729417 0.868234 0.750583 0.871766 0.792938 0.877063 0.821167 0.884125 0.861167 0.817062 0.92 0.734125 0.976479 0.711172 0.988229 0.48 0.988229 0.494125 0.967063 0.517062 0.912937 0.508234 0.832937 0.485297 0.788229 0.471172 0.774125 0.395297 0.729417
+45 0.375219 0.0678333 0.375219 0.0590833 0.386828 0.0503542 0.424156 0.0315208 0.440797 0.0281458 0.464 0.0389167 0.525531 0.115583 0.611797 0.222521 0.676359 0.306583 0.678875 0.317354 0.677359 0.385271 0.66475 0.394687 0.588594 0.407458 0.417094 0.517771 0.280906 0.604521 0.0806562 0.722208 0.0256719 0.763917 0.00296875 0.809646 0 0.786104 0 0.745083 0 0.612583 0.03525 0.613271 0.0877187 0.626708 0.130594 0.626708 0.170437 0.6025 0.273844 0.548708 0.338906 0.507 0.509906 0.4115 0.604734 0.359042 0.596156 0.338188 0.595141 0.306583 0.595141 0.291792 0.579516 0.213104 0.516969 0.129042 0.498297 0.100792 0.466516 0.0987708 0.448875 0.0786042 0.405484 0.0705208 0.375219 0.0678333 0.28675 0.108375 0.282719 0.123167 0.267078 0.162854 0.266062 0.189083 0.245391 0.199833 0.203516 0.251625 0.187375 0.269771 0.159641 0.240188 0.101125 0.249604 0 0.287271 0 0.250271 0 0.245563 0.0975938 0.202521 0.203516 0.145354 0.251953 0.123167 0.28675 0.108375
+49 0.587812 0.128229 0.612281 0.0965625 0.663391 0.0840833 0.690031 0.0908125 0.700109 0.10425 0.705859 0.133042 0.700109 0.143604 0.686422 0.146479 0.664828 0.153188 0.644672 0.157042 0.629563 0.175271 0.605797 0.181021 0.595 0.147437
+49 0.7405 0.178417 0.733719 0.173896 0.727781 0.162583 0.729484 0.150167 0.738812 0.124146 0.747281 0.0981458 0.776109 0.0811875 0.804094 0.0845833 0.814266 0.102667 0.818516 0.115104 0.812578 0.133208 0.782906 0.151292 0.754063 0.172771
+49 0.602656 0.178854 0.636125 0.167875 0.655172 0.165125 0.6665 0.162375 0.680391 0.155521 0.691719 0.153458 0.703047 0.154146 0.713859 0.162375 0.724156 0.174729 0.730844 0.193271 0.733422 0.217979 0.733938 0.244063 0.733422 0.281813 0.732391 0.295542 0.728266 0.300354 0.702016 0.294854 0.682969 0.28525 0.672156 0.270146
+49 0.716891 0.0519583 0.683766 0.0103958 0.611688 0.0051875 0.568828 0.116875 0.590266 0.15325 0.590266 0.116875 0.613641 0.0857083 0.631172 0.0857083 0.6565 0.083125 0.679875 0.0883125 0.691563 0.0961042 0.711031 0.0649375
+
+instances_val2017.json为coco格式的验证集标注,可直接调用coco api用于map的计算。

+

训练&推理时,需修改configs/coco.yaml中的train_set,val_set,test_set为真实数据路径

+

使用MindYOLO套件完成自定义数据集finetune的实际案例可参考 微调

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/how_to_guides/write_a_new_model/index.html b/zh/how_to_guides/write_a_new_model/index.html new file mode 100644 index 00000000..97d3443f --- /dev/null +++ b/zh/how_to_guides/write_a_new_model/index.html @@ -0,0 +1,1553 @@ + + + + + + + + + + + + + + + + + + + + + + + + 自定义模型 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

模型编写指南

+

本文档提供MindYOLO编写自定义模型的教程。
+分为三个部分:

+
    +
  • 模型定义:我们可以直接定义一个网络,也可以使用yaml文件方式定义一个网络。
  • +
  • 注册模型:可选,注册之后可以在create_model接口中使用文件名创建自定义的模型
  • +
  • 验证: 验证模型是否可运行
  • +
+

模型定义

+

1.直接使用python代码来编写网络

+

模块导入

+

导入MindSpore框架中的nn模块和ops模块,用于定义神经网络的组件和操作。 +

import mindspore.nn as nn
+import mindspore.ops.operations as ops
+

+

创建模型

+

定义了一个继承自nn.Cell的模型类MyModel。在构造函数__init__中,定义模型的各个组件:

+
class MyModel(nn.Cell):
+    def __init__(self):
+        super(MyModel, self).__init__()
+        #conv1是一个2D卷积层,输入通道数为3,输出通道数为16,卷积核大小为3x3,步长为1,填充为1。
+        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
+        #relu是一个ReLU激活函数操作。
+        self.relu = ops.ReLU()
+        #axpool是一个2D最大池化层,池化窗口大小为2x2,步长为2。
+        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
+        #conv2是另一个2D卷积层,输入通道数为16,输出通道数为32,卷积核大小为3x3,步长为1,填充为1。
+        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
+        #fc是一个全连接层,输入特征维度为32x8x8,输出特征维度为10。
+        self.fc = nn.Dense(32 * 8 * 8, 10)
+
+    #在construct方法中,定义了模型的前向传播过程。输入x经过卷积、激活函数、池化等操作后,通过展平操作将特征张量变为一维向量,然后通过全连接层得到最终的输出结果。    
+    def construct(self, x): 
+        x = self.conv1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.conv2(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = x.view(x.shape[0], -1)
+        x = self.fc(x)
+        return x
+
+

创建模型实例

+

通过实例化MyModel类,创建一个模型实例model,后续可以使用该实例进行模型的训练和推理。 +

model = MyModel()
+

+

2.使用yaml文件编写网络

+

通常需要以下三个步骤:

+
    +
  • 新建一个mymodel.yaml文件
  • +
  • 新建对应的mymodel.py文件
  • +
  • 在mindyolo/models/init.py文件中引入该模型
  • +
+

以下是编写mymodel.yaml文件的详细指导:
+以编写一个简单网络为例: +以yaml格式编写必要参数,后续在mymodel.py文件里面可以用到这些参数。 +其中network部分为模型网络
+[[from, number, module, args], ...]:每个元素代表一个网络层的配置。
+

# __BASE__中的yaml表示用于继承的基础配置文件,重复的参数会被当前文件覆盖;
+__BASE__:
+  - '../coco.yaml'
+  - './hyp.scratch-high.yaml'
+
+per_batch_size: 32
+img_size: 640
+sync_bn: False
+
+network:
+  model_name: mymodel
+  depth_multiple: 1.0  # model depth multiple
+  width_multiple: 1.0  # layer channel multiple
+  stride: [ 8, 16, 32 ]
+
+  # 骨干网络部分的配置,每层的元素含义为
+  # [from, number, module, args]
+  # 以第一层为例,[-1, 1, ConvNormAct, [32, 3, 1]], 表示输入来自 `-1`(上一层) ,重复次数为 1,模块名为 ConvNormAct,模块输入参数为 [32, 3, 1];
+  backbone: 
+    [[-1, 1, ConvNormAct, [32, 3, 1]],  # 0
+     [-1, 1, ConvNormAct, [64, 3, 2]],  # 1-P1/2
+     [-1, 1, Bottleneck, [64]],
+     [-1, 1, ConvNormAct, [128, 3, 2]],  # 3-P2/4
+     [-1, 2, Bottleneck, [128]],
+     [-1, 1, ConvNormAct, [256, 3, 2]],  # 5-P3/8
+     [-1, 8, Bottleneck, [256]],
+      ]
+
+  #head部分的配置 
+  head: 
+    [
+    [ -1, 1, ConvNormAct, [ 512, 3, 2 ] ],  # 7-P4/16
+      [ -1, 8, Bottleneck, [ 512 ] ],
+      [ -1, 1, ConvNormAct, [ 1024, 3, 2 ] ],  # 9-P5/32
+      [ -1, 4, Bottleneck, [ 1024 ] ],  # 10
+    ]
+

+

编写mymodel.py文件:

+

模块导入

+

需要导入套件内的模块。 如from .registry import register_model等等

+
import numpy as np
+
+import mindspore as ms
+from mindspore import Tensor, nn
+
+
+from .initializer import initialize_defult #用于初始化模型的默认参数,包括权重初始化方式、BN 层参数等。
+from .model_factory import build_model_from_cfg #用于根据 YAML 配置文件中的参数构建目标检测模型,并返回该模型的实例。
+from .registry import register_model #用于将自定义的模型注册到 Mindyolo 中,以便在 YAML 配置文件中使用。
+
+#可见性声明
+__all__ = ["MYmodel", "mymodel"]
+
+

创建配置字典

+

_cfg函数是一个辅助函数,用于创建配置字典。它接受一个url参数和其他关键字参数,并返回一个包含url和其他参数的字典。
+default_cfgs是一个字典,用于存储默认配置。在这里,mymodel作为键,使用_cfg函数创建了一个配置字典。 +

def _cfg(url="", **kwargs):
+    return {"url": url, **kwargs}
+
+default_cfgs = {"mymodel": _cfg(url="")}
+

+

创建模型

+

MindSpore中,模型的类继承于nn.Cell,一般来说需要重载以下两个函数:

+
    +
  • __init__函数中,应当定义模型中需要用到的module层。
  • +
  • construct函数中定义模型前向逻辑。
  • +
+
class MYmodel(nn.Cell):
+
+    def __init__(self, cfg, in_channels=3, num_classes=None, sync_bn=False):
+        super(MYmodel, self).__init__()
+        self.cfg = cfg
+        self.stride = Tensor(np.array(cfg.stride), ms.int32)
+        self.stride_max = int(max(self.cfg.stride))
+        ch, nc = in_channels, num_classes
+
+        self.nc = nc  # override yaml value
+        self.model = build_model_from_cfg(model_cfg=cfg, in_channels=ch, num_classes=nc, sync_bn=sync_bn)
+        self.names = [str(i) for i in range(nc)]  # default names
+
+        initialize_defult()  # 可选,你可能需要initialize_defult方法以获得和pytorch一样的conv2d、dense层的初始化方式;
+
+    def construct(self, x):
+        return self.model(x)
+
+

注册模型(可选)

+

如果需要使用mindyolo接口初始化自定义的模型,那么需要先对模型进行**注册**和**导入**

+

模型注册
+

@register_model #注册后的模型可以通过 create_model 接口以模型名的方式进行访问;
+def mymodel(cfg, in_channels=3, num_classes=None, **kwargs) -> MYmodel:
+    """Get GoogLeNet model.
+    Refer to the base class `models.GoogLeNet` for more details."""
+    model = MYmodel(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+模型导入

+
#在mindyolo/models/_init_.py文件中添加以下代码
+
+from . import mymodel #mymodel.py文件通常放在mindyolo/models/目录下
+__all__.extend(mymodel.__all__)
+from .mymodel import *
+
+

验证main

+

初始编写阶段应当保证模型是可运行的。可通过下述代码块进行基础验证: +首先导入所需的模块和函数。然后,通过解析配置对象。

+

if __name__ == "__main__":
+    from mindyolo.models.model_factory import create_model
+    from mindyolo.utils.config import parse_config
+
+    opt = parse_config()
+
+创建模型并指定相关参数,注意:如果要在create_model中使用文件名创建自定义的模型,那么需要先使用注册器@register_model进行注册,请参见上文 注册模型(可选)部分内容 +
    model = create_model(
+        model_name="mymodel",
+        model_cfg=opt.net,
+        num_classes=opt.data.nc,
+        sync_bn=opt.sync_bn if hasattr(opt, "sync_bn") else False,
+    ) 
+

+

否则,请使用import的方式引入模型

+

    from mindyolo.models.mymodel import MYmodel
+    model = MYmodel(
+        model_name="mymodel",
+        model_cfg=opt.net,
+        num_classes=opt.data.nc,
+        sync_bn=opt.sync_bn if hasattr(opt, "sync_bn") else False,
+    ) 
+
+最后,创建一个输入张量x并将其传递给模型进行前向计算。 +
    x = Tensor(np.random.randn(1, 3, 640, 640), ms.float32)
+    out = model(x)
+    out = out[0] if isinstance(out, (list, tuple)) else out
+    print(f"Output shape is {[o.shape for o in out]}")
+

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/index.html b/zh/index.html new file mode 100644 index 00000000..c1b3c024 --- /dev/null +++ b/zh/index.html @@ -0,0 +1,1378 @@ + + + + + + + + + + + + + + + + + + + + + + 主页 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

MindYOLO

+

+ + docs + + + GitHub + + + PRs Welcome + +

+ +

MindYOLO基于mindspore实现了最新的YOLO系列算法。以下是mindyolo的分支与mindspore版本的对应关系:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
mindyolomindspore
mastermaster
0.42.3.0
0.32.2.10
0.22.0
0.11.8
+

+

模型仓库和基准

+

详见 模型仓库表格

+

支持模型列表

+ +

安装

+

详见 安装

+

快速开始

+

详见 快速开始

+

说明

+

⚠️ 当前版本基于 图模式静态shape开发。 +动态shape将在后续支持,敬请期待。

+

参与项目

+

为了让mindyolo更加完善和丰富,我们欢迎包括issue和pr在内的任何开源贡献。

+

请参考 参与项目 获取提供开源贡献的相关指导。

+

许可

+

MindYOLO基于 Apache License 2.0 发布。

+

须知

+

MindYOLO 是一个开源项目,我们欢迎任何贡献和反馈。我们希望该mindyolo能够通过提供灵活且标准化的工具包来支持不断壮大的研究社区,重现现有方法,并开发自己的新实时对象检测方法。

+

引用

+

如果您发现该项目对您的研究有用,请考虑引用:

+
@misc{MindSpore Object Detection YOLO 2023,
+    title={{MindSpore Object Detection YOLO}:MindSpore Object Detection YOLO Toolbox and Benchmark},
+    author={MindSpore YOLO Contributors},
+    howpublished = {\url{https://github.com/mindspore-lab/mindyolo}},
+    year={2023}
+}
+
+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/installation/index.html b/zh/installation/index.html new file mode 100644 index 00000000..44731e61 --- /dev/null +++ b/zh/installation/index.html @@ -0,0 +1,1295 @@ + + + + + + + + + + + + + + + + + + + + + + + + 安装 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

安装

+

依赖

+
    +
  • mindspore >= 2.3
  • +
  • numpy >= 1.17.0
  • +
  • pyyaml >= 5.3
  • +
  • openmpi 4.0.3 (分布式训练所需)
  • +
+

如需安装python相关库依赖,只需运行:

+
pip install -r requirements.txt
+
+

如需安装MindSpore,你可以通过遵循官方指引,在不同的硬件平台上获得最优的安装体验。 为了在分布式模式下运行,您还需要安装OpenMPI

+

⚠️ 当前版本仅支持Ascend平台,GPU会在后续支持,敬请期待。

+

PyPI源安装

+

MindYOLO 现已发布为一个Python包并能够通过pip进行安装。我们推荐您在虚拟环境安装使用。 打开终端,输入以下指令来安装 MindYOLO:

+
pip install mindyolo
+
+

源码安装 (未经测试版本)

+

通过VSC安装

+
pip install git+https://github.com/mindspore-lab/mindyolo.git
+
+

通过本地src安装

+

由于本项目处于活跃开发阶段,如果您是开发者或者贡献者,请优先选择此安装方式。

+

MindYOLO 可以在由 GitHub 克隆仓库到本地文件夹后直接使用。 这对于想使用最新版本的开发者十分方便:

+
git clone https://github.com/mindspore-lab/mindyolo.git
+
+

在克隆到本地之后,推荐您使用"可编辑"模式进行安装,这有助于解决潜在的模块导入问题。

+
cd mindyolo
+pip install -e .
+
+

我们提供了一个可选的 fast coco api 接口用于提升验证过程的速度。代码是以C++形式提供的,可以尝试用以下的命令进行安装 (此操作是可选的) :

+
cd mindyolo/csrc
+sh build.sh
+
+

我们还提供了基于MindSpore Custom自定义算子 的GPU融合算子,用于提升训练过程的速度。代码采用C++和CUDA开发,位于examples/custom_gpu_op/路径下。您可参考示例脚本examples/custom_gpu_op/iou_loss_fused.py,修改mindyolo/models/losses/iou_loss.pybbox_iou方法,在GPU训练过程中使用该特性。运行iou_loss_fused.py前,需要使用以下的命令,编译生成GPU融合算子运行所依赖的动态库 (此操作并非必需) :

+
bash examples/custom_gpu_op/fused_op/build.sh
+
+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/modelzoo/benchmark/index.html b/zh/modelzoo/benchmark/index.html new file mode 100644 index 00000000..1b8f50a8 --- /dev/null +++ b/zh/modelzoo/benchmark/index.html @@ -0,0 +1,1668 @@ + + + + + + + + + + + + + + + + + + + + + + + + Benchmark - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

模型仓库

+

检测任务

+
+performance tested on Ascend 910(8p) with graph mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ParamsRecipeDownload
YOLOv8N16 * 8640MS COCO 201737.23.2Myamlweights
YOLOv8S16 * 8640MS COCO 201744.611.2Myamlweights
YOLOv8M16 * 8640MS COCO 201750.525.9Myamlweights
YOLOv8L16 * 8640MS COCO 201752.843.7Myamlweights
YOLOv8X16 * 8640MS COCO 201753.768.2Myamlweights
YOLOv7Tiny16 * 8640MS COCO 201737.56.2Myamlweights
YOLOv7L16 * 8640MS COCO 201750.836.9Myamlweights
YOLOv7X12 * 8640MS COCO 201752.471.3Myamlweights
YOLOv5N32 * 8640MS COCO 201727.31.9Myamlweights
YOLOv5S32 * 8640MS COCO 201737.67.2Myamlweights
YOLOv5M32 * 8640MS COCO 201744.921.2Myamlweights
YOLOv5L32 * 8640MS COCO 201748.546.5Myamlweights
YOLOv5X16 * 8640MS COCO 201750.586.7Myamlweights
YOLOv4CSPDarknet5316 * 8608MS COCO 201745.427.6Myamlweights
YOLOv4CSPDarknet53(silu)16 * 8608MS COCO 201745.827.6Myamlweights
YOLOv3Darknet5316 * 8640MS COCO 201745.561.9Myamlweights
YOLOXN8 * 8416MS COCO 201724.10.9Myamlweights
YOLOXTiny8 * 8416MS COCO 201733.35.1Myamlweights
YOLOXS8 * 8640MS COCO 201740.79.0Myamlweights
YOLOXM8 * 8640MS COCO 201746.725.3Myamlweights
YOLOXL8 * 8640MS COCO 201749.254.2Myamlweights
YOLOXX8 * 8640MS COCO 201751.699.1Myamlweights
YOLOXDarknet538 * 8640MS COCO 201747.763.7Myamlweights
+
+
+设备 Ascend 910*(8p) 测试结果 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ms/stepParamsRecipeDownload
YOLOv8N16 * 8640MS COCO 201737.3373.553.2Myamlweights
YOLOv8S16 * 8640MS COCO 201744.7365.5311.2Myamlweights
YOLOv7Tiny16 * 8640MS COCO 201737.5496.216.2Myamlweights
YOLOv5N32 * 8640MS COCO 201727.4736.081.9Myamlweights
YOLOv5S32 * 8640MS COCO 201737.6787.347.2Myamlweights
YOLOv4CSPDarknet5316 * 8608MS COCO 201746.1337.2527.6Myamlweights
YOLOv3Darknet5316 * 8640MS COCO 201746.6396.6061.9Myamlweights
YOLOXS8 * 8640MS COCO 201741.0242.159.0Myamlweights
+
+

图像分割

+
+设备 Ascend 910(8p) 测试结果 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)Mask mAP (%)ParamsRecipeDownload
YOLOv8-segX16 * 8640MS COCO 201752.542.971.8Myamlweights
+
+

部署

+ +

说明

+
    +
  • Box mAP:在验证集上计算的准确度。
  • +
+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/modelzoo/yolov3/index.html b/zh/modelzoo/yolov3/index.html new file mode 100644 index 00000000..5101fbf3 --- /dev/null +++ b/zh/modelzoo/yolov3/index.html @@ -0,0 +1,1483 @@ + + + + + + + + + + + + + + + + + + + + + + + + YOLOv3 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

YOLOv3

+
+

YOLOv3: An Incremental Improvement

+
+

摘要

+

我们对YOLO进行了一系列更新!它包含一堆小设计,可以使系统的性能得到更新。我们也训练了一个新的、比较大的神经网络。虽然比上一版更大一些,但是精度也提高了。不用担心,它的速度依然很快。YOLOv3在320×320输入图像上运行时只需22ms,并能达到28.2mAP,其精度和SSD相当,但速度要快上3倍。使用之前0.5 IOU mAP的检测指标,YOLOv3的效果是相当不错。YOLOv3使用Titan X GPU,其耗时51ms检测精度达到57.9 AP50,与RetinaNet相比,其精度只有57.5 AP50,但却耗时198ms,相同性能的条件下YOLOv3速度比RetinaNet快3.8倍。

+
+ +
+ +

结果

+
+使用图模式在 Ascend 910(8p) 上测试的表现 + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ParamsRecipeDownload
YOLOv3Darknet5316 * 8640MS COCO 201745.561.9Myamlweights
+
+
+在Ascend 910*(8p)上测试的表现 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ms/stepParamsRecipeDownload
YOLOv3Darknet5316 * 8640MS COCO 201746.6396.6061.9Myamlweights
+
+


+

说明

+
    +
  • Box mAP:验证集上测试出的准确度。
  • +
  • 我们参考了常用的第三方 YOLOv3 的实现。
  • +
+

快速入门

+

详情请参阅 MindYOLO 中的 快速入门

+

训练

+

- 预训练模型

+

您可以从 此处 获取预训练模型。

+

要将其转换为 mindyolo 可加载的 ckpt 文件,请将其放在根目录中,然后运行以下语句: +

python mindyolo/utils/convert_weight_darknet53.py
+

+

- 分布式训练

+

使用预置的训练配方可以轻松重现报告的结果。如需在多台Ascend 910设备上进行分布式训练,请运行 +

# 在多台GPU/Ascend设备上进行分布式训练
+mpirun -n 8 python train.py --config ./configs/yolov3/yolov3.yaml --device_target Ascend --is_parallel True
+

+
+

如果脚本由root用户执行,则必须在mpirun中添加--allow-run-as-root参数。

+
+

同样的,您可以使用上述mpirun命令在多台GPU设备上训练模型。

+

有关所有超参数的详细说明,请参阅config.py

+

注意: 由于全局batch size(batch_size x 设备数)是一个重要的超参数,建议保持全局batch size不变进行复制,或者将学习率线性调整为新的全局batch size。

+

- 单卡训练

+

如果您想在较小的数据集上训练或微调模型而不进行分布式训练,请运行:

+
# 在 CPU/GPU/Ascend 设备上进行单卡训练
+python train.py --config ./configs/yolov3/yolov3.yaml --device_target Ascend
+
+

验证和测试

+

要验证训练模型的准确性,您可以使用 test.py 并使用 --weight 传入权重路径。

+
python test.py --config ./configs/yolov3/yolov3.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt
+
+

部署

+

详见 部署

+

引用

+ +

[1] Jocher Glenn. YOLOv3 release v9.1. https://github.com/ultralytics/yolov3/releases/tag/v9.1, 2021. +[2] Joseph Redmon and Ali Farhadi. YOLOv3: An incremental improvement. arXiv preprint arXiv:1804.02767, 2018.

+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/modelzoo/yolov4/index.html b/zh/modelzoo/yolov4/index.html new file mode 100644 index 00000000..ecc56ca5 --- /dev/null +++ b/zh/modelzoo/yolov4/index.html @@ -0,0 +1,1518 @@ + + + + + + + + + + + + + + + + + + + + + + + + YOLOv4 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

YOLOv4

+
+

YOLOv4: Optimal Speed and Accuracy of Object Detection

+
+

摘要

+

目前有很多可以提高CNN准确性的算法。这些算法的组合在庞大数据集上进行测试、对实验结果进行理论验证都是非常必要的。 +有些算法只在特定的模型上有效果,并且只对特定的问题有效,或者只对小规模的数据集有效; +然而有些算法,比如batch-normalization和residual-connections,对大多数的模型、任务和数据集都适用。 +我们认为这样通用的算法包括:Weighted-Residual-Connections(WRC), Cross-Stage-Partial-connections(CSP), +Cross mini-Batch Normalization(CmBN), Self-adversarial-training(SAT)以及Mish-activation。 +我们使用了新的算法:WRC, CSP, CmBN, SAT, Mish activation, Mosaic data augmentation, CmBN, Dropblock regularization 和CIoU loss以及它们的组合, +获得了最优的效果:在MS COCO数据集上的AP值为43.5%(65.7% AP50),在Tesla V100上的实时推理速度为65FPS。

+
+ +
+ +

结果

+
+使用图模式在 Ascend 910(8p) 上测试的表现 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ParamsRecipeDownload
YOLOv4CSPDarknet5316 * 8608MS COCO 201745.427.6Myamlweights
YOLOv4CSPDarknet53(silu)16 * 8608MS COCO 201745.827.6Myamlweights
+
+
+在Ascend 910*(8p)上测试的表现 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ms/stepParamsRecipeDownload
YOLOv4CSPDarknet5316 * 8608MS COCO 201746.1337.2527.6Myamlweights
+
+


+

说明

+
    +
  • Box mAP: 验证集上测试出的准确度。
  • +
+

快速入门

+

详情请参阅 MindYOLO 中的 快速入门

+

训练

+

- 预训练模型

+

您可以从 此处 获取预训练模型。

+

要将其转换为 mindyolo 可加载的 ckpt 文件,请将其放在根目录中,然后运行以下语句: +

python mindyolo/utils/convert_weight_cspdarknet53.py
+

+

- 分布式训练

+

使用预置的训练配方可以轻松重现报告的结果。如需在多台Ascend 910设备上进行分布式训练,请运行 +

# distributed training on multiple GPU/Ascend devices
+mpirun -n 8 python train.py --config ./configs/yolov4/yolov4-silu.yaml --device_target Ascend --is_parallel True --epochs 320
+

+
+

如果脚本由root用户执行,则必须在mpirun中添加--allow-run-as-root参数。

+
+

同样的,您可以使用上述mpirun命令在多台GPU设备上训练模型。

+

有关所有超参数的详细说明,请参阅config.py

+

说明

+
    +
  • 由于全局batch size(batch_size x 设备数)是一个重要的超参数,建议保持全局batch size不变进行复制,或者将学习率线性调整为新的全局batch size。
  • +
  • 如果出现以下警告,可以通过设置环境变量 PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning' 来修复。 +
    multiprocessing/semaphore_tracker.py: 144 UserWarning: semaphore_tracker: There appear to be 235 leaked semaphores to clean up at shutdown len(cache))
    +
  • +
+

- 单卡训练

+

如果您想在较小的数据集上训练或微调模型而不进行分布式训练,请运行:

+
# 在 CPU/GPU/Ascend 设备上进行单卡训练
+python train.py --config ./configs/yolov4/yolov4-silu.yaml --device_target Ascend --epochs 320
+
+

验证和测试

+

要验证训练模型的准确性,您可以使用 test.py 并使用 --weight 传入权重路径。

+
python test.py --config ./configs/yolov4/yolov4-silu.yaml --device_target Ascend --iou_thres 0.6 --weight /PATH/TO/WEIGHT.ckpt
+
+

部署

+

详见 部署.

+

引用

+ +

[1] Alexey Bochkovskiy, Chien-Yao Wang and Ali Farhadi. YOLOv4: Optimal Speed and Accuracy of Object Detection. arXiv preprint arXiv:2004.10934, 2020.

+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/modelzoo/yolov5/index.html b/zh/modelzoo/yolov5/index.html new file mode 100644 index 00000000..1e9267a1 --- /dev/null +++ b/zh/modelzoo/yolov5/index.html @@ -0,0 +1,1516 @@ + + + + + + + + + + + + + + + + + + + + + + + + YOLOv5 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

YOLOv5

+

摘要

+

YOLOv5 是在 COCO 数据集上预训练的一系列对象检测架构和模型,代表了 Ultralytics 对未来视觉 AI 方法的开源研究,融合了数千小时的研究和开发中积累的经验教训和最佳实践。

+
+ +
+ +

结果

+
+使用图模式在 Ascend 910(8p) 上测试的表现 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ParamsRecipeDownload
YOLOv5N32 * 8640MS COCO 201727.31.9Myamlweights
YOLOv5S32 * 8640MS COCO 201737.67.2Myamlweights
YOLOv5M32 * 8640MS COCO 201744.921.2Myamlweights
YOLOv5L32 * 8640MS COCO 201748.546.5Myamlweights
YOLOv5X16 * 8640MS COCO 201750.586.7Myamlweights
+
+
+在Ascend 910*(8p)上测试的表现 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ms/stepParamsRecipeDownload
YOLOv5N32 * 8640MS COCO 201727.4736.081.9Myamlweights
YOLOv5S32 * 8640MS COCO 201737.6787.347.2Myamlweights
+
+


+

说明

+
    +
  • Box mAP:验证集上测试出的准确度。
  • +
  • 我们参考了常用的第三方 YOLOV5 重现了P5(大目标)系列模型,并做出了如下改动:与官方代码有所不同,我们使用了8x NPU(Ascend910)进行训练,单NPU的batch size为32。
  • +
+

快速入门

+

详情请参阅 MindYOLO 中的 快速入门

+

训练

+

- 分布式训练

+

使用预置的训练配方可以轻松重现报告的结果。如需在多台Ascend 910设备上进行分布式训练,请运行 +

# 在多台GPU/Ascend设备上进行分布式训练
+mpirun -n 8 python train.py --config ./configs/yolov5/yolov5n.yaml --device_target Ascend --is_parallel True
+

+
+

如果脚本由root用户执行,则必须在mpirun中添加--allow-run-as-root参数。

+
+

同样的,您可以使用上述mpirun命令在多台GPU设备上训练模型。

+

有关所有超参数的详细说明,请参阅config.py

+

注意: 由于全局batch size(batch_size x 设备数)是一个重要的超参数,建议保持全局batch size不变进行复制,或者将学习率线性调整为新的全局batch size。

+

- 单卡训练

+

如果您想在较小的数据集上训练或微调模型而不进行分布式训练,请运行:

+
# 在 CPU/GPU/Ascend 设备上进行单卡训练
+python train.py --config ./configs/yolov5/yolov5n.yaml --device_target Ascend
+
+

验证和测试

+

要验证训练模型的准确性,您可以使用 test.py 并使用 --weight 传入权重路径。

+
python test.py --config ./configs/yolov5/yolov5n.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt
+
+

部署

+

详见 部署

+

引用

+ +

[1] Jocher Glenn. YOLOv5 release v6.1. https://github.com/ultralytics/yolov5/releases/tag/v6.1, 2022.

+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/modelzoo/yolov7/index.html b/zh/modelzoo/yolov7/index.html new file mode 100644 index 00000000..34007376 --- /dev/null +++ b/zh/modelzoo/yolov7/index.html @@ -0,0 +1,1486 @@ + + + + + + + + + + + + + + + + + + + + + + + + YOLOv7 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

YOLOv7

+
+

YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors

+
+

摘要

+

YOLOv7在5FPS到 160 FPS 范围内的速度和准确度都超过了所有已知的物体检测器,YOLOv7 在 5 FPS 到 160 FPS 范围内的速度和准确度都超过了所有已知的目标检测器,并且在 GPU V100 上 30 FPS 或更高的所有已知实时目标检测器中具有最高的准确度 56.8% AP。YOLOv7-E6 目标检测器(56 FPS V100,55.9% AP)比基于transformer-based的检测器 SWINL Cascade-Mask R-CNN(9.2 FPS A100,53.9% AP)的速度和准确度分别高出 509% 和 2%,以及基于卷积的检测器 ConvNeXt-XL Cascade-Mask R-CNN (8.6 FPS A100, 55.2% AP) 速度提高 551%,准确率提高 0.7%,以及 YOLOv7 的表现优于:YOLOR、YOLOX、Scaled-YOLOv4、YOLOv5、DETR、Deformable DETR , DINO-5scale-R50, ViT-Adapter-B 和许多其他物体探测器在速度和准确度上。 此外,我们只在 MS COCO 数据集上从头开始训练 YOLOv7,而不使用任何其他数据集或预训练的权重。

+
+ +
+ +

结果

+
+使用图模式在 Ascend 910(8p) 上测试的表现 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ParamsRecipeDownload
YOLOv7Tiny16 * 8640MS COCO 201737.56.2Myamlweights
YOLOv7L16 * 8640MS COCO 201750.836.9Myamlweights
YOLOv7X12 * 8640MS COCO 201752.471.3Myamlweights
+
+
+在Ascend 910*(8p)上测试的表现 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ms/stepParamsRecipeDownload
YOLOv7Tiny16 * 8640MS COCO 201737.5496.216.2Myamlweights
+
+


+

说明

+
    +
  • Context:训练上下文,表示为{设备}x{设备数}-{mindspore模式},其中mindspore模式可以是G-图模式或F-pynative模式。例如,D910x8-G用于在8块Ascend 910 NPU上使用graph模式进行训练。
  • +
  • Box mAP:验证集上测试出的准确度。
  • +
  • 我们参考了常用的第三方 YOLOV7 重现了P5(大目标)系列模型,并做出了如下改动:与官方代码有所不同,我们使用了8x NPU(Ascend910)进行训练,tiny/l/x单NPU的batch size分别为16/16/12。
  • +
+

快速入门

+

详情请参阅 MindYOLO 中的 快速入门

+

训练

+

- 分布式训练

+

使用预置的训练配方可以轻松重现报告的结果。如需在多台Ascend 910设备上进行分布式训练,请运行 +

# 在多台GPU/Ascend设备上进行分布式训练
+mpirun -n 8 python train.py --config ./configs/yolov7/yolov7.yaml --device_target Ascend --is_parallel True
+

+
+

如果脚本由root用户执行,则必须在mpirun中添加--allow-run-as-root参数。

+
+

同样的,您可以使用上述mpirun命令在多台GPU设备上训练模型。

+

有关所有超参数的详细说明,请参阅config.py

+

注意: 由于全局batch size(batch_size x 设备数)是一个重要的超参数,建议保持全局batch size不变进行复制,或者将学习率线性调整为新的全局batch size。

+

- 单卡训练

+

如果您想在较小的数据集上训练或微调模型而不进行分布式训练,请运行:

+
# 在 CPU/GPU/Ascend 设备上进行单卡训练
+python train.py --config ./configs/yolov7/yolov7.yaml --device_target Ascend
+
+

验证和测试

+

要验证训练模型的准确性,您可以使用 test.py 并使用 --weight 传入权重路径。

+
python test.py --config ./configs/yolov7/yolov7.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt
+
+

部署

+

详见 部署

+

引用

+ +

[1] Chien-Yao Wang, Alexey Bochkovskiy, and HongYuan Mark Liao. Yolov7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. arXiv preprint arXiv:2207.02696, 2022.

+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/modelzoo/yolov8/index.html b/zh/modelzoo/yolov8/index.html new file mode 100644 index 00000000..c7764c41 --- /dev/null +++ b/zh/modelzoo/yolov8/index.html @@ -0,0 +1,1590 @@ + + + + + + + + + + + + + + + + + + + + + + + + YOLOv8 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

YOLOv8

+

摘要

+

Ultralytics YOLOv8 由 Ultralytics 开发,是一款尖端的、最先进的 (SOTA) 模型,它以之前 YOLO 版本的成功为基础,并引入了新功能和改进,以进一步提高性能和灵活性。YOLOv8 旨在快速、准确且易于使用,使其成为各种物体检测、图像分割和图像分类任务的绝佳选择。

+
+ +
+ +

结果

+

图像检测

+
+使用图模式在 Ascend 910(8p) 上测试的表现 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ParamsRecipeDownload
YOLOv8N16 * 8640MS COCO 201737.23.2Myamlweights
YOLOv8S16 * 8640MS COCO 201744.611.2Myamlweights
YOLOv8M16 * 8640MS COCO 201750.525.9Myamlweights
YOLOv8L16 * 8640MS COCO 201752.843.7Myamlweights
YOLOv8X16 * 8640MS COCO 201753.768.2Myamlweights
+
+
+在Ascend 910*(8p)上测试的表现 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ms/stepParamsRecipeDownload
YOLOv8N16 * 8640MS COCO 201737.3373.553.2Myamlweights
YOLOv8S16 * 8640MS COCO 201744.7365.5311.2Myamlweights
+
+

图像分割

+
+使用图模式在 Ascend 910(8p) 上测试的表现 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)Mask mAP (%)ParamsRecipeDownload
YOLOv8-segX16 * 8640MS COCO 201752.542.971.8Myamlweights
+
+

说明

+
    +
  • Box mAP:验证集上测试出的准确度。
  • +
  • 我们参考了常用的第三方 YOLOV8 重现了P5(大目标)系列模型。
  • +
+

快速入门

+

详情请参阅 MindYOLO 中的 快速入门

+

训练

+

- 分布式训练

+

使用预置的训练配方可以轻松重现报告的结果。如需在多台Ascend 910设备上进行分布式训练,请运行 +

# 在多台GPU/Ascend设备上进行分布式训练
+mpirun -n 8 python train.py --config ./configs/yolov8/yolov8n.yaml --device_target Ascend --is_parallel True
+

+
+

如果脚本由root用户执行,则必须在mpirun中添加--allow-run-as-root参数。

+
+

同样的,您可以使用上述mpirun命令在多台GPU设备上训练模型。

+

有关所有超参数的详细说明,请参阅config.py

+

注意: 由于全局batch size(batch_size x 设备数)是一个重要的超参数,建议保持全局batch size不变进行复制,或者将学习率线性调整为新的全局batch size。

+

- 单卡训练

+

如果您想在较小的数据集上训练或微调模型而不进行分布式训练,请运行:

+
# 在 CPU/GPU/Ascend 设备上进行单卡训练
+python train.py --config ./configs/yolov8/yolov8n.yaml --device_target Ascend
+
+

验证和测试

+

要验证训练模型的准确性,您可以使用 test.py 并使用 --weight 传入权重路径。

+
python test.py --config ./configs/yolov8/yolov8n.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt
+
+

部署

+

详见 部署

+

引用

+ +

[1] Jocher Glenn. Ultralytics YOLOv8. https://github.com/ultralytics/ultralytics, 2023.

+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/modelzoo/yolox/index.html b/zh/modelzoo/yolox/index.html new file mode 100644 index 00000000..26c33656 --- /dev/null +++ b/zh/modelzoo/yolox/index.html @@ -0,0 +1,1526 @@ + + + + + + + + + + + + + + + + + + + + + + + + YOLOx - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + +
+
+
+ + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

YOLOX

+

摘要

+

YOLOX 是一款新型高性能检测模型,在 YOLO 系列的基础上进行了一些经验丰富的改进。我们将 YOLO 检测器改为无锚方式,并采用其他先进的检测技术,例如解耦头和领先的标签分配策略 SimOTA,以在大规模模型中实现最佳效果:对于只有 0.91M 参数和 1.08G FLOPs 的 YOLO-Nano,我们在 COCO 上获得了 25.3% 的 AP,比 NanoDet 高出 1.8% AP;对于业界使用最广泛的检测器之一 YOLOv3,我们将其在 COCO 上的 AP 提升到 47.3%,比目前的最佳实践高出 3.0% AP;对于参数量与 YOLOv4-CSP 大致相同的 YOLOX-L,YOLOv5-L 在 Tesla V100 上以 68.9 FPS 的速度在 COCO 上实现了 50.0% 的 AP,比 YOLOv5-L 高出 1.8% 的 AP。此外,我们使用单个 YOLOX-L 模型在流式感知挑战赛(CVPR 2021 自动驾驶研讨会)上获得了第一名。

+
+ +
+ +

结果

+
+使用图模式在 Ascend 910(8p) 上测试的表现 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ParamsRecipeDownload
YOLOXN8 * 8416MS COCO 201724.10.9Myamlweights
YOLOXTiny8 * 8416MS COCO 201733.35.1Myamlweights
YOLOXS8 * 8640MS COCO 201740.79.0Myamlweights
YOLOXM8 * 8640MS COCO 201746.725.3Myamlweights
YOLOXL8 * 8640MS COCO 201749.254.2Myamlweights
YOLOXX8 * 8640MS COCO 201751.699.1Myamlweights
YOLOXDarknet538 * 8640MS COCO 201747.763.7Myamlweights
+
+
+在Ascend 910*(8p)上测试的表现 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleBatchSizeImageSizeDatasetBox mAP (%)ms/stepParamsRecipeDownload
YOLOXS8 * 8640MS COCO 201741.0242.159.0Myamlweights
+
+


+

说明

+
    +
  • Box mAP: 验证集上测试出的准确度。
  • +
  • 我们参考了官方的 YOLOX 来重现结果.
  • +
+

快速入门

+

详情请参阅 MindYOLO 中的 快速入门

+

训练

+

- 分布式训练

+

使用预置的训练配方可以轻松重现报告的结果。如需在多台Ascend 910设备上进行分布式训练,请运行 +

# 在多台GPU/Ascend设备上进行分布式训练
+mpirun -n 8 python train.py --config ./configs/yolox/yolox-s.yaml --device_target Ascend --is_parallel True
+

+
+

如果脚本由root用户执行,则必须在mpirun中添加--allow-run-as-root参数。

+
+

同样的,您可以使用上述mpirun命令在多台GPU设备上训练模型。

+

有关所有超参数的详细说明,请参阅config.py

+

**注意:**由于全局batch size(batch_size x 设备数)是一个重要的超参数,建议保持全局batch size不变进行复制,或者将学习率线性调整为新的全局batch size。

+

- 单卡训练

+

如果您想在较小的数据集上训练或微调模型而不进行分布式训练,请运行:

+
# 在 CPU/GPU/Ascend 设备上进行单卡训练
+python train.py --config ./configs/yolox/yolox-s.yaml --device_target Ascend
+
+

验证和测试

+

要验证训练模型的准确性,您可以使用 test.py 并使用 --weight 传入权重路径。

+
python test.py --config ./configs/yolox/yolox-s.yaml --device_target Ascend --weight /PATH/TO/WEIGHT.ckpt
+
+

部署

+

详见 部署

+

引用

+ +

[1] Zheng Ge. YOLOX: Exceeding YOLO Series in 2021. https://arxiv.org/abs/2107.08430, 2021.

+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/notes/changelog/index.html b/zh/notes/changelog/index.html new file mode 100644 index 00000000..cb08d45e --- /dev/null +++ b/zh/notes/changelog/index.html @@ -0,0 +1,1140 @@ + + + + + + + + + + + + + + + + + + + + + + + + 更新日志 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

更新日志

+

即将到来

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/notes/code_of_conduct/index.html b/zh/notes/code_of_conduct/index.html new file mode 100644 index 00000000..d779ef07 --- /dev/null +++ b/zh/notes/code_of_conduct/index.html @@ -0,0 +1,1140 @@ + + + + + + + + + + + + + + + + + + + + + + + + 行为准则 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

行为准则

+

即将到来

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/notes/contributing/index.html b/zh/notes/contributing/index.html new file mode 100644 index 00000000..d162d3ee --- /dev/null +++ b/zh/notes/contributing/index.html @@ -0,0 +1,1393 @@ + + + + + + + + + + + + + + + + + + + + + + + + 参与项目 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

MindYOLO 贡献指南

+

贡献者许可协议

+

首次向 MindYOLO 社区提交代码前,需签署 CLA。

+

个人贡献者请参考 ICLA 在线文档 了解详细信息。

+

入门指南

+ +

贡献流程

+

代码风格

+

请遵循此风格,以便 MindYOLO 易于审查、维护和开发。

+
    +
  • 编码指南
  • +
+

MindYOLO 社区使用 Python PEP 8 编码风格 建议的 Python 编码风格和 Google C++ 编码指南 建议的 C++ 编码风格。 CppLintCppCheckCMakeLintCodeSpellLizardShellCheckPyLint 用于检查代码格式,建议在 IDE 中安装这些插件。

+
    +
  • 单元测试指南
  • +
+

MindYOLO 社区使用 pytest 建议的 Python 单元测试风格和 Googletest Primer 建议的 C++ 单元测试风格。测试用例的设计意图应该通过其注释名称来体现。

+
    +
  • 重构指南
  • +
+

我们鼓励开发人员重构我们的代码以消除 代码异味。所有代码都应符合编码风格和测试风格的需求,重构代码也不例外。Lizard 对 nloc(无注释的代码行数)的阈值为 100,对 cnc(循环复杂度数)的阈值为 20,当您收到 Lizard 警告时,您必须重构要合并的代码。

+
    +
  • 文档指南
  • +
+

我们使用 MarkdownLint 检查 markdown 文档的格式。MindYOLO CI 根据默认配置修改了以下规则。

+
    +
  • MD007(无序列表缩进):indent**参数设置为**4,表示无序列表中的所有内容都需要使用四个空格进行缩进。
  • +
  • MD009(行末空格):br_spaces**参数设置为**2,表示行末可以有0个或2个空格。
  • +
  • MD029(有序列表的序号):style**参数设置为**ordered,表示有序列表的序号按升序排列。
  • +
+

具体请参见RULES

+

Fork-Pull开发模式

+
    +
  • Fork MindYOLO仓库
  • +
+

在向MindYOLO项目提交代码之前,请确保该项目已经fork到你自己的仓库。这意味着MindYOLO 仓库和你自己的仓库之间会并行开发,所以要小心避免两者不一致。

+
    +
  • 克隆远程仓库
  • +
+

如果要将代码下载到本地机器,git 是最好的方式:

+
# 对于 GitHub
+git clone https://github.com/{insert_your_forked_repo}/mindyolo.git
+git remote add upper https://github.com/mindspore-lab/mindyolo.git
+
+
    +
  • 本地开发代码
  • +
+

为避免多个分支之间不一致,SUGGESTED 建议签出到新分支:

+
git checkout -b {new_branch_name} origin/master
+
+

以 master 分支为例,MindYOLO 可能会根据需要创建版本分支和下游开发分支,请先修复上游的 bug。 +然后你可以任意更改代码。

+
    +
  • 将代码推送到远程仓库
  • +
+

更新代码后,应以正式方式推送更新:

+
git add .
+git status # 检查更新状态
+git commit -m "您的提交标题"
+git commit -s --amend #添加提交的具体描述
+git push origin {new_branch_name}
+
+
    +
  • 将请求拉取到 MindYOLO 仓库
  • +
+

最后一步,您需要将新分支与 MindYOLO master 分支进行比较。完成拉取请求后,Jenkins CI 将自动设置为构建测试。您的拉取请求应尽快合并到上游主分支中,以降低合并风险。

+

报告问题

+

为项目做出贡献的一种好方法是在遇到问题时发送详细报告。我们始终欣赏写得好、详尽的错误报告,并会为此感谢您!

+

报告问题时,请参考以下格式:

+
    +
  • 您使用的是哪个版本的环境(MindSpore、os、python、MindYOLO 等)?
  • +
  • 这是错误报告还是功能请求?
  • +
  • 什么类型的问题,请添加标签以在问题仪表板上突出显示它。
  • +
  • 发生了什么?
  • +
  • 您期望发生什么?
  • +
  • 如何重现它?(尽可能简短和准确)
  • +
  • 给审阅者的特别说明?
  • +
+

问题咨询:

+
    +
  • 如果您发现一个未关闭的问题,而这正是您要解决的问题, 请在该问题上发表一些评论,告诉其他人您将负责该问题。
  • +
  • 如果问题打开了一段时间, 建议贡献者在解决该问题之前进行预检查。
  • +
  • 如果您解决了自己报告的问题, 也需要在关闭该问题之前通知其他人。
  • +
  • 如果您希望问题尽快得到回复, 请尝试为其添加标签,您可以在 标签列表 上找到各种标签
  • +
+

提出 PR

+
    +
  • +

    GitHub 上以 issue 形式提出您的想法

    +
  • +
  • +

    如果是需要大量设计细节的新功能,还应提交设计提案。

    +
  • +
  • +

    在问题讨论和设计提案审查中达成共识后,完成分叉仓库的开发并提交 PR。

    +
  • +
  • +

    任何 PR 都必须收到来自批准者的 2+ LGTM 才能被允许。请注意,批准者不得在自己的 PR 上添加 LGTM

    +
  • +
  • +

    PR 经过充分讨论后,将根据讨论结果进行合并、放弃或拒绝。

    +
  • +
+

PR 建议:

+
    +
  • 应避免任何不相关的更改。
  • +
  • 确保您的提交历史记录有序。
  • +
  • 始终让您的分支与主分支保持一致。
  • +
  • 对于错误修复 PR,请确保所有相关问题都已链接。
  • +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/notes/faq/index.html b/zh/notes/faq/index.html new file mode 100644 index 00000000..fdb762db --- /dev/null +++ b/zh/notes/faq/index.html @@ -0,0 +1,1123 @@ + + + + + + + + + + + + + + + + + + + + + + 常见问题 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

常见问题

+

即将到来

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/reference/data/index.html b/zh/reference/data/index.html new file mode 100644 index 00000000..64c2b67d --- /dev/null +++ b/zh/reference/data/index.html @@ -0,0 +1,4265 @@ + + + + + + + + + + + + + + + + + + + + + + + + 数据 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

数据

+

数据加载

+ + +
+ + +

+ mindyolo.data.loader.create_loader(dataset, batch_collate_fn, column_names_getitem, column_names_collate, batch_size, epoch_size=1, rank=0, rank_size=1, num_parallel_workers=8, shuffle=True, drop_remainder=False, python_multiprocessing=False) + +

+ + +
+ +

Creates dataloader.

+

Applies operations such as transform and batch to the ms.dataset.Dataset object +created by the create_dataset function to get the dataloader.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
dataset +
+

dataset object created by create_dataset.

+
+

+ + TYPE: + COCODataset + +

+
batch_size +
+

The number of rows each batch is created with. An +int or callable object which takes exactly 1 parameter, BatchInfo.

+
+

+ + TYPE: + int or function + +

+
drop_remainder +
+

Determines whether to drop the last block +whose data row number is less than batch size (default=False). If True, and if there are less +than batch_size rows available to make the last batch, then those rows will +be dropped and not propagated to the child node.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_parallel_workers +
+

Number of workers(threads) to process the dataset in parallel +(default=None).

+
+

+ + TYPE: + int + + + DEFAULT: + 8 + +

+
python_multiprocessing +
+

Parallelize Python operations with multiple worker processes. This +option could be beneficial if the Python operation is computational heavy (default=False).

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

BatchDataset, dataset batched.

+
+
+ +
+ Source code in mindyolo/data/loader.py +
14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
def create_loader(
+    dataset,
+    batch_collate_fn,
+    column_names_getitem,
+    column_names_collate,
+    batch_size,
+    epoch_size=1,
+    rank=0,
+    rank_size=1,
+    num_parallel_workers=8,
+    shuffle=True,
+    drop_remainder=False,
+    python_multiprocessing=False,
+):
+    r"""Creates dataloader.
+
+    Applies operations such as transform and batch to the `ms.dataset.Dataset` object
+    created by the `create_dataset` function to get the dataloader.
+
+    Args:
+        dataset (COCODataset): dataset object created by `create_dataset`.
+        batch_size (int or function): The number of rows each batch is created with. An
+            int or callable object which takes exactly 1 parameter, BatchInfo.
+        drop_remainder (bool, optional): Determines whether to drop the last block
+            whose data row number is less than batch size (default=False). If True, and if there are less
+            than batch_size rows available to make the last batch, then those rows will
+            be dropped and not propagated to the child node.
+        num_parallel_workers (int, optional): Number of workers(threads) to process the dataset in parallel
+            (default=None).
+        python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker processes. This
+            option could be beneficial if the Python operation is computational heavy (default=False).
+
+    Returns:
+        BatchDataset, dataset batched.
+    """
+    de.config.set_seed(1236517205 + rank)
+    cores = multiprocessing.cpu_count()
+    num_parallel_workers = min(int(cores / rank_size), num_parallel_workers)
+    logger.info(f"Dataloader num parallel workers: [{num_parallel_workers}]")
+    if rank_size > 1:
+        ds = de.GeneratorDataset(
+            dataset,
+            column_names=column_names_getitem,
+            num_parallel_workers=min(8, num_parallel_workers),
+            shuffle=shuffle,
+            python_multiprocessing=python_multiprocessing,
+            num_shards=rank_size,
+            shard_id=rank,
+        )
+    else:
+        ds = de.GeneratorDataset(
+            dataset,
+            column_names=column_names_getitem,
+            num_parallel_workers=min(32, num_parallel_workers),
+            shuffle=shuffle,
+            python_multiprocessing=python_multiprocessing,
+        )
+    ds = ds.batch(
+        batch_size, per_batch_map=batch_collate_fn,
+        input_columns=column_names_getitem, output_columns=column_names_collate, drop_remainder=drop_remainder
+    )
+    ds = ds.repeat(epoch_size)
+
+    return ds
+
+
+
+ +

数据集

+ + +
+ + + +

+ mindyolo.data.dataset.COCODataset + + +

+ + +
+ + +

Load the COCO dataset (yolo format coco labels)

+ + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
dataset_path +
+

dataset label directory for dataset.

+
+

+ + TYPE: + str + + + DEFAULT: + '' + +

+
for +
+

COCO_ROOT + ├── train2017.txt + ├── annotations + │ └── instances_train2017.json + ├── images + │ └── train2017 + │ ├── 000000000001.jpg + │ └── 000000000002.jpg + └── labels + └── train2017 + ├── 000000000001.txt + └── 000000000002.txt +dataset_path (str): ./coco/train2017.txt

+
+

+ + TYPE: + example + +

+
transforms +
+

A list of images data enhancements +that apply data enhancements on data set objects in order.

+
+

+ + TYPE: + list + +

+
+ +
+ Source code in mindyolo/data/dataset.py +
  27
+  28
+  29
+  30
+  31
+  32
+  33
+  34
+  35
+  36
+  37
+  38
+  39
+  40
+  41
+  42
+  43
+  44
+  45
+  46
+  47
+  48
+  49
+  50
+  51
+  52
+  53
+  54
+  55
+  56
+  57
+  58
+  59
+  60
+  61
+  62
+  63
+  64
+  65
+  66
+  67
+  68
+  69
+  70
+  71
+  72
+  73
+  74
+  75
+  76
+  77
+  78
+  79
+  80
+  81
+  82
+  83
+  84
+  85
+  86
+  87
+  88
+  89
+  90
+  91
+  92
+  93
+  94
+  95
+  96
+  97
+  98
+  99
+ 100
+ 101
+ 102
+ 103
+ 104
+ 105
+ 106
+ 107
+ 108
+ 109
+ 110
+ 111
+ 112
+ 113
+ 114
+ 115
+ 116
+ 117
+ 118
+ 119
+ 120
+ 121
+ 122
+ 123
+ 124
+ 125
+ 126
+ 127
+ 128
+ 129
+ 130
+ 131
+ 132
+ 133
+ 134
+ 135
+ 136
+ 137
+ 138
+ 139
+ 140
+ 141
+ 142
+ 143
+ 144
+ 145
+ 146
+ 147
+ 148
+ 149
+ 150
+ 151
+ 152
+ 153
+ 154
+ 155
+ 156
+ 157
+ 158
+ 159
+ 160
+ 161
+ 162
+ 163
+ 164
+ 165
+ 166
+ 167
+ 168
+ 169
+ 170
+ 171
+ 172
+ 173
+ 174
+ 175
+ 176
+ 177
+ 178
+ 179
+ 180
+ 181
+ 182
+ 183
+ 184
+ 185
+ 186
+ 187
+ 188
+ 189
+ 190
+ 191
+ 192
+ 193
+ 194
+ 195
+ 196
+ 197
+ 198
+ 199
+ 200
+ 201
+ 202
+ 203
+ 204
+ 205
+ 206
+ 207
+ 208
+ 209
+ 210
+ 211
+ 212
+ 213
+ 214
+ 215
+ 216
+ 217
+ 218
+ 219
+ 220
+ 221
+ 222
+ 223
+ 224
+ 225
+ 226
+ 227
+ 228
+ 229
+ 230
+ 231
+ 232
+ 233
+ 234
+ 235
+ 236
+ 237
+ 238
+ 239
+ 240
+ 241
+ 242
+ 243
+ 244
+ 245
+ 246
+ 247
+ 248
+ 249
+ 250
+ 251
+ 252
+ 253
+ 254
+ 255
+ 256
+ 257
+ 258
+ 259
+ 260
+ 261
+ 262
+ 263
+ 264
+ 265
+ 266
+ 267
+ 268
+ 269
+ 270
+ 271
+ 272
+ 273
+ 274
+ 275
+ 276
+ 277
+ 278
+ 279
+ 280
+ 281
+ 282
+ 283
+ 284
+ 285
+ 286
+ 287
+ 288
+ 289
+ 290
+ 291
+ 292
+ 293
+ 294
+ 295
+ 296
+ 297
+ 298
+ 299
+ 300
+ 301
+ 302
+ 303
+ 304
+ 305
+ 306
+ 307
+ 308
+ 309
+ 310
+ 311
+ 312
+ 313
+ 314
+ 315
+ 316
+ 317
+ 318
+ 319
+ 320
+ 321
+ 322
+ 323
+ 324
+ 325
+ 326
+ 327
+ 328
+ 329
+ 330
+ 331
+ 332
+ 333
+ 334
+ 335
+ 336
+ 337
+ 338
+ 339
+ 340
+ 341
+ 342
+ 343
+ 344
+ 345
+ 346
+ 347
+ 348
+ 349
+ 350
+ 351
+ 352
+ 353
+ 354
+ 355
+ 356
+ 357
+ 358
+ 359
+ 360
+ 361
+ 362
+ 363
+ 364
+ 365
+ 366
+ 367
+ 368
+ 369
+ 370
+ 371
+ 372
+ 373
+ 374
+ 375
+ 376
+ 377
+ 378
+ 379
+ 380
+ 381
+ 382
+ 383
+ 384
+ 385
+ 386
+ 387
+ 388
+ 389
+ 390
+ 391
+ 392
+ 393
+ 394
+ 395
+ 396
+ 397
+ 398
+ 399
+ 400
+ 401
+ 402
+ 403
+ 404
+ 405
+ 406
+ 407
+ 408
+ 409
+ 410
+ 411
+ 412
+ 413
+ 414
+ 415
+ 416
+ 417
+ 418
+ 419
+ 420
+ 421
+ 422
+ 423
+ 424
+ 425
+ 426
+ 427
+ 428
+ 429
+ 430
+ 431
+ 432
+ 433
+ 434
+ 435
+ 436
+ 437
+ 438
+ 439
+ 440
+ 441
+ 442
+ 443
+ 444
+ 445
+ 446
+ 447
+ 448
+ 449
+ 450
+ 451
+ 452
+ 453
+ 454
+ 455
+ 456
+ 457
+ 458
+ 459
+ 460
+ 461
+ 462
+ 463
+ 464
+ 465
+ 466
+ 467
+ 468
+ 469
+ 470
+ 471
+ 472
+ 473
+ 474
+ 475
+ 476
+ 477
+ 478
+ 479
+ 480
+ 481
+ 482
+ 483
+ 484
+ 485
+ 486
+ 487
+ 488
+ 489
+ 490
+ 491
+ 492
+ 493
+ 494
+ 495
+ 496
+ 497
+ 498
+ 499
+ 500
+ 501
+ 502
+ 503
+ 504
+ 505
+ 506
+ 507
+ 508
+ 509
+ 510
+ 511
+ 512
+ 513
+ 514
+ 515
+ 516
+ 517
+ 518
+ 519
+ 520
+ 521
+ 522
+ 523
+ 524
+ 525
+ 526
+ 527
+ 528
+ 529
+ 530
+ 531
+ 532
+ 533
+ 534
+ 535
+ 536
+ 537
+ 538
+ 539
+ 540
+ 541
+ 542
+ 543
+ 544
+ 545
+ 546
+ 547
+ 548
+ 549
+ 550
+ 551
+ 552
+ 553
+ 554
+ 555
+ 556
+ 557
+ 558
+ 559
+ 560
+ 561
+ 562
+ 563
+ 564
+ 565
+ 566
+ 567
+ 568
+ 569
+ 570
+ 571
+ 572
+ 573
+ 574
+ 575
+ 576
+ 577
+ 578
+ 579
+ 580
+ 581
+ 582
+ 583
+ 584
+ 585
+ 586
+ 587
+ 588
+ 589
+ 590
+ 591
+ 592
+ 593
+ 594
+ 595
+ 596
+ 597
+ 598
+ 599
+ 600
+ 601
+ 602
+ 603
+ 604
+ 605
+ 606
+ 607
+ 608
+ 609
+ 610
+ 611
+ 612
+ 613
+ 614
+ 615
+ 616
+ 617
+ 618
+ 619
+ 620
+ 621
+ 622
+ 623
+ 624
+ 625
+ 626
+ 627
+ 628
+ 629
+ 630
+ 631
+ 632
+ 633
+ 634
+ 635
+ 636
+ 637
+ 638
+ 639
+ 640
+ 641
+ 642
+ 643
+ 644
+ 645
+ 646
+ 647
+ 648
+ 649
+ 650
+ 651
+ 652
+ 653
+ 654
+ 655
+ 656
+ 657
+ 658
+ 659
+ 660
+ 661
+ 662
+ 663
+ 664
+ 665
+ 666
+ 667
+ 668
+ 669
+ 670
+ 671
+ 672
+ 673
+ 674
+ 675
+ 676
+ 677
+ 678
+ 679
+ 680
+ 681
+ 682
+ 683
+ 684
+ 685
+ 686
+ 687
+ 688
+ 689
+ 690
+ 691
+ 692
+ 693
+ 694
+ 695
+ 696
+ 697
+ 698
+ 699
+ 700
+ 701
+ 702
+ 703
+ 704
+ 705
+ 706
+ 707
+ 708
+ 709
+ 710
+ 711
+ 712
+ 713
+ 714
+ 715
+ 716
+ 717
+ 718
+ 719
+ 720
+ 721
+ 722
+ 723
+ 724
+ 725
+ 726
+ 727
+ 728
+ 729
+ 730
+ 731
+ 732
+ 733
+ 734
+ 735
+ 736
+ 737
+ 738
+ 739
+ 740
+ 741
+ 742
+ 743
+ 744
+ 745
+ 746
+ 747
+ 748
+ 749
+ 750
+ 751
+ 752
+ 753
+ 754
+ 755
+ 756
+ 757
+ 758
+ 759
+ 760
+ 761
+ 762
+ 763
+ 764
+ 765
+ 766
+ 767
+ 768
+ 769
+ 770
+ 771
+ 772
+ 773
+ 774
+ 775
+ 776
+ 777
+ 778
+ 779
+ 780
+ 781
+ 782
+ 783
+ 784
+ 785
+ 786
+ 787
+ 788
+ 789
+ 790
+ 791
+ 792
+ 793
+ 794
+ 795
+ 796
+ 797
+ 798
+ 799
+ 800
+ 801
+ 802
+ 803
+ 804
+ 805
+ 806
+ 807
+ 808
+ 809
+ 810
+ 811
+ 812
+ 813
+ 814
+ 815
+ 816
+ 817
+ 818
+ 819
+ 820
+ 821
+ 822
+ 823
+ 824
+ 825
+ 826
+ 827
+ 828
+ 829
+ 830
+ 831
+ 832
+ 833
+ 834
+ 835
+ 836
+ 837
+ 838
+ 839
+ 840
+ 841
+ 842
+ 843
+ 844
+ 845
+ 846
+ 847
+ 848
+ 849
+ 850
+ 851
+ 852
+ 853
+ 854
+ 855
+ 856
+ 857
+ 858
+ 859
+ 860
+ 861
+ 862
+ 863
+ 864
+ 865
+ 866
+ 867
+ 868
+ 869
+ 870
+ 871
+ 872
+ 873
+ 874
+ 875
+ 876
+ 877
+ 878
+ 879
+ 880
+ 881
+ 882
+ 883
+ 884
+ 885
+ 886
+ 887
+ 888
+ 889
+ 890
+ 891
+ 892
+ 893
+ 894
+ 895
+ 896
+ 897
+ 898
+ 899
+ 900
+ 901
+ 902
+ 903
+ 904
+ 905
+ 906
+ 907
+ 908
+ 909
+ 910
+ 911
+ 912
+ 913
+ 914
+ 915
+ 916
+ 917
+ 918
+ 919
+ 920
+ 921
+ 922
+ 923
+ 924
+ 925
+ 926
+ 927
+ 928
+ 929
+ 930
+ 931
+ 932
+ 933
+ 934
+ 935
+ 936
+ 937
+ 938
+ 939
+ 940
+ 941
+ 942
+ 943
+ 944
+ 945
+ 946
+ 947
+ 948
+ 949
+ 950
+ 951
+ 952
+ 953
+ 954
+ 955
+ 956
+ 957
+ 958
+ 959
+ 960
+ 961
+ 962
+ 963
+ 964
+ 965
+ 966
+ 967
+ 968
+ 969
+ 970
+ 971
+ 972
+ 973
+ 974
+ 975
+ 976
+ 977
+ 978
+ 979
+ 980
+ 981
+ 982
+ 983
+ 984
+ 985
+ 986
+ 987
+ 988
+ 989
+ 990
+ 991
+ 992
+ 993
+ 994
+ 995
+ 996
+ 997
+ 998
+ 999
+1000
+1001
+1002
+1003
+1004
+1005
+1006
+1007
+1008
+1009
+1010
+1011
+1012
+1013
+1014
+1015
+1016
+1017
+1018
+1019
+1020
+1021
+1022
+1023
+1024
+1025
+1026
+1027
+1028
+1029
+1030
+1031
+1032
+1033
+1034
+1035
+1036
+1037
+1038
+1039
+1040
+1041
+1042
+1043
+1044
+1045
+1046
+1047
+1048
+1049
+1050
+1051
+1052
+1053
+1054
+1055
+1056
+1057
+1058
+1059
+1060
+1061
+1062
+1063
+1064
+1065
+1066
+1067
+1068
+1069
+1070
+1071
+1072
+1073
+1074
+1075
+1076
+1077
+1078
+1079
+1080
+1081
+1082
+1083
+1084
+1085
+1086
+1087
+1088
+1089
+1090
+1091
+1092
+1093
+1094
+1095
+1096
+1097
+1098
+1099
+1100
+1101
+1102
+1103
+1104
+1105
+1106
+1107
+1108
+1109
+1110
+1111
+1112
+1113
+1114
+1115
+1116
+1117
+1118
+1119
+1120
+1121
+1122
+1123
+1124
+1125
+1126
+1127
+1128
+1129
+1130
+1131
+1132
+1133
+1134
+1135
+1136
+1137
+1138
+1139
+1140
+1141
+1142
+1143
+1144
+1145
+1146
+1147
+1148
+1149
+1150
+1151
+1152
+1153
+1154
+1155
+1156
+1157
+1158
+1159
+1160
+1161
+1162
+1163
+1164
+1165
+1166
+1167
+1168
+1169
+1170
+1171
+1172
+1173
+1174
+1175
+1176
+1177
+1178
+1179
+1180
+1181
+1182
+1183
+1184
+1185
+1186
+1187
+1188
class COCODataset:
+    """
+    Load the COCO dataset (yolo format coco labels)
+
+    Args:
+        dataset_path (str): dataset label directory for dataset.
+        for example:
+            COCO_ROOT
+                ├── train2017.txt
+                ├── annotations
+                │     └── instances_train2017.json
+                ├── images
+                │     └── train2017
+                │             ├── 000000000001.jpg
+                │             └── 000000000002.jpg
+                └── labels
+                      └── train2017
+                              ├── 000000000001.txt
+                              └── 000000000002.txt
+            dataset_path (str): ./coco/train2017.txt
+        transforms (list): A list of images data enhancements
+            that apply data enhancements on data set objects in order.
+    """
+
+    def __init__(
+        self,
+        dataset_path="",
+        img_size=640,
+        transforms_dict=None,
+        is_training=False,
+        augment=False,
+        rect=False,
+        single_cls=False,
+        batch_size=32,
+        stride=32,
+        num_cls=80,
+        pad=0.0,
+        return_segments=False,  # for segment
+        return_keypoints=False, # for keypoint
+        nkpt=0,                 # for keypoint
+        ndim=0                  # for keypoint
+    ):
+        # acceptable image suffixes
+        self.img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo']
+        self.cache_version = 0.2
+
+        self.return_segments = return_segments
+        self.return_keypoints = return_keypoints
+        assert not (return_segments and return_keypoints), 'Can not return both segments and keypoints.'
+
+        self.path = dataset_path
+        self.img_size = img_size
+        self.augment = augment
+        self.rect = rect
+        self.stride = stride
+        self.num_cls = num_cls
+        self.nkpt = nkpt
+        self.ndim = ndim
+        self.transforms_dict = transforms_dict
+        self.is_training = is_training
+
+        # set column names
+        self.column_names_getitem = ['samples']
+        if self.is_training:
+            self.column_names_collate = ['images', 'labels']
+            if self.return_segments:
+                self.column_names_collate = ['images', 'labels', 'masks']
+            elif self.return_keypoints:
+                self.column_names_collate = ['images', 'labels', 'keypoints']
+        else:
+            self.column_names_collate = ["images", "img_files", "hw_ori", "hw_scale", "pad"]
+
+        try:
+            f = []  # image files
+            for p in self.path if isinstance(self.path, list) else [self.path]:
+                p = Path(p)  # os-agnostic
+                if p.is_dir():  # dir
+                    f += glob.glob(str(p / "**" / "*.*"), recursive=True)
+                elif p.is_file():  # file
+                    with open(p, "r") as t:
+                        t = t.read().strip().splitlines()
+                        parent = str(p.parent) + os.sep
+                        f += [x.replace("./", parent) if x.startswith("./") else x for x in t]  # local to global path
+                else:
+                    raise Exception(f"{p} does not exist")
+            self.img_files = sorted([x.replace("/", os.sep) for x in f if x.split(".")[-1].lower() in self.img_formats])
+            assert self.img_files, f"No images found"
+        except Exception as e:
+            raise Exception(f"Error loading data from {self.path}: {e}\n")
+
+        # Check cache
+        self.label_files = self._img2label_paths(self.img_files)  # labels
+        cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix(".cache.npy")  # cached labels
+        if cache_path.is_file():
+            cache, exists = np.load(cache_path, allow_pickle=True).item(), True  # load dict
+            if cache["version"] == self.cache_version \
+                    and cache["hash"] == self._get_hash(self.label_files + self.img_files):
+                logger.info(f"Dataset Cache file hash/version check success.")
+                logger.info(f"Load dataset cache from [{cache_path}] success.")
+            else:
+                logger.info(f"Dataset cache file hash/version check fail.")
+                logger.info(f"Datset caching now...")
+                cache, exists = self.cache_labels(cache_path), False  # cache
+                logger.info(f"Dataset caching success.")
+        else:
+            logger.info(f"No dataset cache available, caching now...")
+            cache, exists = self.cache_labels(cache_path), False  # cache
+            logger.info(f"Dataset caching success.")
+
+        # Display cache
+        nf, nm, ne, nc, n = cache.pop("results")  # found, missing, empty, corrupted, total
+        if exists:
+            d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"
+            tqdm(None, desc=d, total=n, initial=n)  # display cache results
+        assert nf > 0 or not augment, f"No labels in {cache_path}. Can not train without labels."
+
+        # Read cache
+        cache.pop("hash")  # remove hash
+        cache.pop("version")  # remove version
+        self.labels = cache['labels']
+        self.img_files = [lb['im_file'] for lb in self.labels]  # update im_files
+
+        # Check if the dataset is all boxes or all segments
+        lengths = ((len(lb['cls']), len(lb['bboxes']), len(lb['segments'])) for lb in self.labels)
+        len_cls, len_boxes, len_segments = (sum(x) for x in zip(*lengths))
+        if len_segments and len_boxes != len_segments:
+            print(
+                f'WARNING ⚠️ Box and segment counts should be equal, but got len(segments) = {len_segments}, '
+                f'len(boxes) = {len_boxes}. To resolve this only boxes will be used and all segments will be removed. '
+                'To avoid this please supply either a detect or segment dataset, not a detect-segment mixed dataset.')
+            for lb in self.labels:
+                lb['segments'] = []
+        if len_cls == 0:
+            raise ValueError(f'All labels empty in {cache_path}, can not start training without labels.')
+
+        if single_cls:
+            for x in self.labels:
+                x['cls'][:, 0] = 0
+
+        n = len(self.labels)  # number of images
+        bi = np.floor(np.arange(n) / batch_size).astype(np.int_)  # batch index
+        nb = bi[-1] + 1  # number of batches
+        self.batch = bi  # batch index of image
+
+        # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
+        self.imgs, self.img_hw_ori, self.indices = None, None, range(n)
+
+        # Rectangular Train/Test
+        if self.rect:
+            # Sort by aspect ratio
+            s = self.img_shapes  # wh
+            ar = s[:, 1] / s[:, 0]  # aspect ratio
+            irect = ar.argsort()
+            self.img_files = [self.img_files[i] for i in irect]
+            self.label_files = [self.label_files[i] for i in irect]
+            self.labels = [self.labels[i] for i in irect]
+            self.img_shapes = s[irect]  # wh
+            ar = ar[irect]
+
+            # Set training image shapes
+            shapes = [[1, 1]] * nb
+            for i in range(nb):
+                ari = ar[bi == i]
+                mini, maxi = ari.min(), ari.max()
+                if maxi < 1:
+                    shapes[i] = [maxi, 1]
+                elif mini > 1:
+                    shapes[i] = [1, 1 / mini]
+
+            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int_) * stride
+
+        self.imgIds = [int(Path(im_file).stem) for im_file in self.img_files]
+
+    def cache_labels(self, path=Path("./labels.cache.npy")):
+        # Cache dataset labels, check images and read shapes
+        x = {'labels': []}  # dict
+        nm, nf, ne, nc, segments, keypoints = 0, 0, 0, 0, [], None  # number missing, found, empty, duplicate
+        pbar = tqdm(zip(self.img_files, self.label_files), desc="Scanning images", total=len(self.img_files))
+        if self.return_keypoints and (self.nkpt <= 0 or self.ndim not in (2, 3)):
+            raise ValueError("'kpt_shape' in data.yaml missing or incorrect. Should be a list with [number of "
+                             "keypoints, number of dims (2 for x,y or 3 for x,y,visible)], i.e. 'kpt_shape: [17, 3]'")
+        for i, (im_file, lb_file) in enumerate(pbar):
+            try:
+                # verify images
+                im = Image.open(im_file)
+                im.verify()  # PIL verify
+                shape = self._exif_size(im)  # image size
+                segments = []  # instance segments
+                assert (shape[0] > 9) & (shape[1] > 9), f"image size {shape} <10 pixels"
+                assert im.format.lower() in self.img_formats, f"invalid image format {im.format}"
+
+                # verify labels
+                if os.path.isfile(lb_file):
+                    nf += 1  # label found
+                    with open(lb_file, "r") as f:
+                        lb = [x.split() for x in f.read().strip().splitlines()]
+                        if any([len(x) > 6 for x in lb]) and (not self.return_keypoints):  # is segment
+                            classes = np.array([x[0] for x in lb], dtype=np.float32)
+                            segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb]  # (cls, xy1...)
+                            lb = np.concatenate(
+                                (classes.reshape(-1, 1), segments2boxes(segments)), 1
+                            )  # (cls, xywh)
+                        lb = np.array(lb, dtype=np.float32)
+                    nl = len(lb)
+                    if nl:
+                        if self.return_keypoints:
+                            assert lb.shape[1] == (5 + self.nkpt * self.ndim), \
+                                f'labels require {(5 + self.nkpt * self.ndim)} columns each'
+                            assert (lb[:, 5::self.ndim] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
+                            assert (lb[:, 6::self.ndim] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
+                        else:
+                            assert lb.shape[1] == 5, f'labels require 5 columns, {lb.shape[1]} columns detected'
+                            assert (lb[:, 1:] <= 1).all(), \
+                                f'non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}'
+                            assert (lb >= 0).all(), f'negative label values {lb[lb < 0]}'
+                        # All labels
+                        max_cls = int(lb[:, 0].max())  # max label count
+                        assert max_cls <= self.num_cls, \
+                            f'Label class {max_cls} exceeds dataset class count {self.num_cls}. ' \
+                            f'Possible class labels are 0-{self.num_cls - 1}'
+                        _, j = np.unique(lb, axis=0, return_index=True)
+                        if len(j) < nl:  # duplicate row check
+                            lb = lb[j]  # remove duplicates
+                            if segments:
+                                segments = [segments[x] for x in i]
+                            print(f'WARNING ⚠️ {im_file}: {nl - len(j)} duplicate labels removed')
+                    else:
+                        ne += 1  # label empty
+                        lb = np.zeros((0, (5 + self.nkpt * self.ndim)), dtype=np.float32) \
+                            if self.return_keypoints else np.zeros((0, 5), dtype=np.float32)
+                else:
+                    nm += 1  # label missing
+                    lb = np.zeros((0, (5 + self.nkpt * self.ndim)), dtype=np.float32) \
+                        if self.return_keypoints else np.zeros((0, 5), dtype=np.float32)
+                if self.return_keypoints:
+                    keypoints = lb[:, 5:].reshape(-1, self.nkpt, self.ndim)
+                    if self.ndim == 2:
+                        kpt_mask = np.ones(keypoints.shape[:2], dtype=np.float32)
+                        kpt_mask = np.where(keypoints[..., 0] < 0, 0.0, kpt_mask)
+                        kpt_mask = np.where(keypoints[..., 1] < 0, 0.0, kpt_mask)
+                        keypoints = np.concatenate([keypoints, kpt_mask[..., None]], axis=-1)  # (nl, nkpt, 3)
+                lb = lb[:, :5]
+                x['labels'].append(
+                    dict(
+                        im_file=im_file,
+                        cls=lb[:, 0:1],     # (n, 1)
+                        bboxes=lb[:, 1:],   # (n, 4)
+                        segments=segments,  # list of (mi, 2)
+                        keypoints=keypoints,
+                        bbox_format='xywhn',
+                        segment_format='polygon'
+                    )
+                )
+            except Exception as e:
+                nc += 1
+                print(f"WARNING: Ignoring corrupted image and/or label {im_file}: {e}")
+
+            pbar.desc = f"Scanning '{path.parent / path.stem}' images and labels... " \
+                        f"{nf} found, {nm} missing, {ne} empty, {nc} corrupted"
+        pbar.close()
+
+        if nf == 0:
+            print(f"WARNING: No labels found in {path}.")
+
+        x["hash"] = self._get_hash(self.label_files + self.img_files)
+        x["results"] = nf, nm, ne, nc, len(self.img_files)
+        x["version"] = self.cache_version  # cache version
+        np.save(path, x)  # save for next time
+        logger.info(f"New cache created: {path}")
+        return x
+
+    def __getitem__(self, index):
+        sample = self.get_sample(index)
+
+        for _i, ori_trans in enumerate(self.transforms_dict):
+            _trans = ori_trans.copy()
+            func_name, prob = _trans.pop("func_name"), _trans.pop("prob", 1.0)
+            if func_name == 'copy_paste':
+                sample = self.copy_paste(sample, prob)
+            elif random.random() < prob:
+                if func_name == "albumentations" and getattr(self, "albumentations", None) is None:
+                    self.albumentations = Albumentations(size=self.img_size, **_trans)
+                if func_name == "letterbox":
+                    new_shape = self.img_size if not self.rect else self.batch_shapes[self.batch[index]]
+                    sample = self.letterbox(sample, new_shape, **_trans)
+                else:
+                    sample = getattr(self, func_name)(sample, **_trans)
+
+        sample['img'] = np.ascontiguousarray(sample['img'])
+        return sample
+
+    def __len__(self):
+        return len(self.img_files)
+
+    def get_sample(self, index):
+        """Get and return label information from the dataset."""
+        sample = deepcopy(self.labels[index])
+        if self.imgs is None:
+            path = self.img_files[index]
+            img = cv2.imread(path)  # BGR
+            assert img is not None, "Image Not Found " + path
+            h_ori, w_ori = img.shape[:2]  # orig hw
+            r = self.img_size / max(h_ori, w_ori)  # resize image to img_size
+            if r != 1:  # always resize down, only resize up if training with augmentation
+                interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
+                img = cv2.resize(img, (int(w_ori * r), int(h_ori * r)), interpolation=interp)
+
+            sample['img'], sample['ori_shape'] = img, np.array([h_ori, w_ori])  # img, hw_original
+
+        else:
+            sample['img'], sample['ori_shape'] = self.imgs[index], self.img_hw_ori[index]  # img, hw_original
+
+        return sample
+
+    def mosaic(
+        self,
+        sample,
+        mosaic9_prob=0.0,
+        post_transform=None,
+    ):
+        segment_format = sample['segment_format']
+        bbox_format = sample['bbox_format']
+        assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'
+        assert bbox_format == 'xywhn', f'The bbox format should be xywhn, but got {bbox_format}'
+
+        mosaic9_prob = min(1.0, max(mosaic9_prob, 0.0))
+        if random.random() < (1 - mosaic9_prob):
+            sample = self._mosaic4(sample)
+        else:
+            sample = self._mosaic9(sample)
+
+        if post_transform:
+            for _i, ori_trans in enumerate(post_transform):
+                _trans = ori_trans.copy()
+                func_name, prob = _trans.pop("func_name"), _trans.pop("prob", 1.0)
+                sample = getattr(self, func_name)(sample, **_trans)
+
+        return sample
+
+    def _mosaic4(self, sample):
+        # loads images in a 4-mosaic
+        classes4, bboxes4, segments4 = [], [], []
+        mosaic_samples = [sample, ]
+        indices = random.choices(self.indices, k=3)  # 3 additional image indices
+
+        segments_is_list = isinstance(sample['segments'], list)
+        if segments_is_list:
+            mosaic_samples += [self.get_sample(i) for i in indices]
+        else:
+            mosaic_samples += [self.resample_segments(self.get_sample(i)) for i in indices]
+
+        s = self.img_size
+        mosaic_border = [-s // 2, -s // 2]
+        yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in mosaic_border]  # mosaic center x, y
+
+        for i, mosaic_sample in enumerate(mosaic_samples):
+            # Load image
+            img = mosaic_sample['img']
+            (h, w) = img.shape[:2]
+
+            # place img in img4
+            if i == 0:  # top left
+                img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
+            elif i == 1:  # top right
+                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
+                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
+            elif i == 2:  # bottom left
+                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
+            elif i == 3:  # bottom right
+                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
+
+            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+            padw = x1a - x1b
+            padh = y1a - y1b
+
+            # box and cls
+            cls, bboxes = mosaic_sample['cls'], mosaic_sample['bboxes']
+            assert mosaic_sample['bbox_format'] == 'xywhn'
+            bboxes = xywhn2xyxy(bboxes, w, h, padw, padh)  # normalized xywh to pixel xyxy format
+            classes4.append(cls)
+            bboxes4.append(bboxes)
+
+            # seg
+            assert mosaic_sample['segment_format'] == 'polygon'
+            segments = mosaic_sample['segments']
+            if segments_is_list:
+                segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
+                segments4.extend(segments)
+            else:
+                segments = xyn2xy(segments, w, h, padw, padh)
+                segments4.append(segments)
+
+        classes4 = np.concatenate(classes4, 0)
+        bboxes4 = np.concatenate(bboxes4, 0)
+        bboxes4 = bboxes4.clip(0, 2 * s)
+
+        if segments_is_list:
+            for x in segments4:
+                np.clip(x, 0, 2 * s, out=x)
+        else:
+            segments4 = np.concatenate(segments4, 0)
+            segments4 = segments4.clip(0, 2 * s)
+
+        sample['img'] = img4
+        sample['cls'] = classes4
+        sample['bboxes'] = bboxes4
+        sample['bbox_format'] = 'ltrb'
+        sample['segments'] = segments4
+        sample['mosaic_border'] = mosaic_border
+
+        return sample
+
+    def _mosaic9(self, sample):
+        # loads images in a 9-mosaic
+        classes9, bboxes9, segments9 = [], [], []
+        mosaic_samples = [sample, ]
+        indices = random.choices(self.indices, k=8)  # 8 additional image indices
+
+        segments_is_list = isinstance(sample['segments'], list)
+        if segments_is_list:
+            mosaic_samples += [self.get_sample(i) for i in indices]
+        else:
+            mosaic_samples += [self.resample_segments(self.get_sample(i)) for i in indices]
+        s = self.img_size
+        mosaic_border = [-s // 2, -s // 2]
+
+        for i, mosaic_sample in enumerate(mosaic_samples):
+            # Load image
+            img = mosaic_sample['img']
+            (h, w) = img.shape[:2]
+
+            # place img in img9
+            if i == 0:  # center
+                img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+                h0, w0 = h, w
+                c = s, s, s + w, s + h  # xmin, ymin, xmax, ymax (base) coordinates
+            elif i == 1:  # top
+                c = s, s - h, s + w, s
+            elif i == 2:  # top right
+                c = s + wp, s - h, s + wp + w, s
+            elif i == 3:  # right
+                c = s + w0, s, s + w0 + w, s + h
+            elif i == 4:  # bottom right
+                c = s + w0, s + hp, s + w0 + w, s + hp + h
+            elif i == 5:  # bottom
+                c = s + w0 - w, s + h0, s + w0, s + h0 + h
+            elif i == 6:  # bottom left
+                c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
+            elif i == 7:  # left
+                c = s - w, s + h0 - h, s, s + h0
+            elif i == 8:  # top left
+                c = s - w, s + h0 - hp - h, s, s + h0 - hp
+
+            padx, pady = c[:2]
+            x1, y1, x2, y2 = [max(x, 0) for x in c]  # allocate coords
+
+            # box and cls
+            assert mosaic_sample['bbox_format'] == 'xywhn'
+            cls, bboxes = mosaic_sample['cls'], mosaic_sample['bboxes']
+            bboxes = xywhn2xyxy(bboxes, w, h, padx, pady)  # normalized xywh to pixel xyxy format
+            classes9.append(cls)
+            bboxes9.append(bboxes)
+
+            # seg
+            assert mosaic_sample['segment_format'] == 'polygon'
+            segments = mosaic_sample['segments']
+            if segments_is_list:
+                segments = [xyn2xy(x, w, h, padx, pady) for x in segments]
+                segments9.extend(segments)
+            else:
+                segments = xyn2xy(segments, w, h, padx, pady)
+                segments9.append(segments)
+
+            # Image
+            img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:]  # img9[ymin:ymax, xmin:xmax]
+            hp, wp = h, w  # height, width previous
+
+        # Offset
+        yc, xc = [int(random.uniform(0, s)) for _ in mosaic_border]  # mosaic center x, y
+        img9 = img9[yc: yc + 2 * s, xc: xc + 2 * s]
+
+        # Concat/clip labels
+        classes9 = np.concatenate(classes9, 0)
+        bboxes9 = np.concatenate(bboxes9, 0)
+        bboxes9[:, [0, 2]] -= xc
+        bboxes9[:, [1, 3]] -= yc
+        bboxes9 = bboxes9.clip(0, 2 * s)
+
+        if segments_is_list:
+            c = np.array([xc, yc])  # centers
+            segments9 = [x - c for x in segments9]
+            for x in segments9:
+                np.clip(x, 0, 2 * s, out=x)
+        else:
+            segments9 = np.concatenate(segments9, 0)
+            segments9[..., 0] -= xc
+            segments9[..., 1] -= yc
+            segments9 = segments9.clip(0, 2 * s)
+
+        sample['img'] = img9
+        sample['cls'] = classes9
+        sample['bboxes'] = bboxes9
+        sample['bbox_format'] = 'ltrb'
+        sample['segments'] = segments9
+        sample['mosaic_border'] = mosaic_border
+
+        return sample
+
+    def resample_segments(self, sample, n=1000):
+        segment_format = sample['segment_format']
+        assert segment_format == 'polygon', f'The segment format is should be polygon, but got {segment_format}'
+
+        segments = sample['segments']
+        if len(segments) > 0:
+            # Up-sample an (n,2) segment
+            for i, s in enumerate(segments):
+                s = np.concatenate((s, s[0:1, :]), axis=0)
+                x = np.linspace(0, len(s) - 1, n)
+                xp = np.arange(len(s))
+                segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T  # segment xy
+            segments = np.stack(segments, axis=0)
+        else:
+            segments = np.zeros((0, 1000, 2), dtype=np.float32)
+        sample['segments'] = segments
+        return sample
+
+    def copy_paste(self, sample, probability=0.5):
+        # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
+        bbox_format, segment_format = sample['bbox_format'], sample['segment_format']
+        assert bbox_format == 'ltrb', f'The bbox format should be ltrb, but got {bbox_format}'
+        assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'
+
+        img = sample['img']
+        cls = sample['cls']
+        bboxes = sample['bboxes']
+        segments = sample['segments']
+
+        n = len(segments)
+        if probability and n:
+            h, w, _ = img.shape  # height, width, channels
+            im_new = np.zeros(img.shape, np.uint8)
+            for j in random.sample(range(n), k=round(probability * n)):
+                c, l, s = cls[j], bboxes[j], segments[j]
+                box = w - l[2], l[1], w - l[0], l[3]
+                ioa = bbox_ioa(box, bboxes)  # intersection over area
+                if (ioa < 0.30).all():  # allow 30% obscuration of existing labels
+                    cls = np.concatenate((cls, [c]), 0)
+                    bboxes = np.concatenate((bboxes, [box]), 0)
+                    if isinstance(segments, list):
+                        segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
+                    else:
+                        segments = np.concatenate((segments, [np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1)]), 0)
+                    cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
+
+            result = cv2.bitwise_and(src1=img, src2=im_new)
+            result = cv2.flip(result, 1)  # augment segments (flip left-right)
+            i = result > 0  # pixels to replace
+            img[i] = result[i]  # cv2.imwrite('debug.jpg', img)  # debug
+
+        sample['img'] = img
+        sample['cls'] = cls
+        sample['bboxes'] = bboxes
+        sample['segments'] = segments
+
+        return sample
+
+    def random_perspective(
+            self, sample, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, border=(0, 0)
+    ):
+        bbox_format, segment_format = sample['bbox_format'], sample['segment_format']
+        assert bbox_format == 'ltrb', f'The bbox format should be ltrb, but got {bbox_format}'
+        assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'
+
+        img = sample['img']
+        cls = sample['cls']
+        targets = sample['bboxes']
+        segments = sample['segments']
+        assert isinstance(segments, np.ndarray), f"segments type expect numpy.ndarray, but got {type(segments)}; " \
+                                                 f"maybe you should resample_segments before that."
+
+        border = sample.pop('mosaic_border', border)
+        height = img.shape[0] + border[0] * 2  # shape(h,w,c)
+        width = img.shape[1] + border[1] * 2
+
+        # Center
+        C = np.eye(3)
+        C[0, 2] = -img.shape[1] / 2  # x translation (pixels)
+        C[1, 2] = -img.shape[0] / 2  # y translation (pixels)
+
+        # Perspective
+        P = np.eye(3)
+        P[2, 0] = random.uniform(-perspective, perspective)  # x perspective (about y)
+        P[2, 1] = random.uniform(-perspective, perspective)  # y perspective (about x)
+
+        # Rotation and Scale
+        R = np.eye(3)
+        a = random.uniform(-degrees, degrees)
+        s = random.uniform(1 - scale, 1 + scale)
+        R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
+
+        # Shear
+        S = np.eye(3)
+        S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
+        S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)
+
+        # Translation
+        T = np.eye(3)
+        T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width  # x translation (pixels)
+        T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height  # y translation (pixels)
+
+        # Combined rotation matrix
+        M = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANT
+        if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed
+            if perspective:
+                img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))
+            else:  # affine
+                img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
+
+        # Transform label coordinates
+        n = len(targets)
+        if n:
+            use_segments = len(segments)
+            new_bboxes = np.zeros((n, 4))
+            if use_segments:  # warp segments
+                point_num = segments[0].shape[0]
+                new_segments = np.zeros((n, point_num, 2))
+                for i, segment in enumerate(segments):
+                    xy = np.ones((len(segment), 3))
+                    xy[:, :2] = segment
+                    xy = xy @ M.T  # transform
+                    xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]  # perspective rescale or affine
+
+                    # clip
+                    new_segments[i] = xy
+                    new_bboxes[i] = segment2box(xy, width, height)
+
+            else:  # warp boxes
+                xy = np.ones((n * 4, 3))
+                xy[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
+                xy = xy @ M.T  # transform
+                xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8)  # perspective rescale or affine
+
+                # create new boxes
+                x = xy[:, [0, 2, 4, 6]]
+                y = xy[:, [1, 3, 5, 7]]
+                new_bboxes = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+
+                # clip
+                new_bboxes[:, [0, 2]] = new_bboxes[:, [0, 2]].clip(0, width)
+                new_bboxes[:, [1, 3]] = new_bboxes[:, [1, 3]].clip(0, height)
+
+            # filter candidates
+            i = box_candidates(box1=targets.T * s, box2=new_bboxes.T, area_thr=0.01 if use_segments else 0.10)
+
+            cls = cls[i]
+            targets = new_bboxes[i]
+            sample['cls'] = cls
+            sample['bboxes'] = targets
+            if use_segments:
+                sample['segments'] = segments[i]
+
+        sample['img'] = img
+
+        return sample
+
+    def mixup(self, sample, alpha: 32.0, beta: 32.0, pre_transform=None):
+        bbox_format, segment_format = sample['bbox_format'], sample['segment_format']
+        assert bbox_format == 'ltrb', f'The bbox format should be ltrb, but got {bbox_format}'
+        assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'
+
+        index = random.choices(self.indices, k=1)[0]
+        sample2 = self.get_sample(index)
+        if pre_transform:
+            for _i, ori_trans in enumerate(pre_transform):
+                _trans = ori_trans.copy()
+                func_name, prob = _trans.pop("func_name"), _trans.pop("prob", 1.0)
+                if func_name == 'copy_paste':
+                    sample2 = self.copy_paste(sample2, prob)
+                elif random.random() < prob:
+                    if func_name == "albumentations" and getattr(self, "albumentations", None) is None:
+                        self.albumentations = Albumentations(size=self.img_size, **_trans)
+                    sample2 = getattr(self, func_name)(sample2, **_trans)
+
+        assert isinstance(sample['segments'], np.ndarray), \
+            f"MixUp: sample segments type expect numpy.ndarray, but got {type(sample['segments'])}; " \
+            f"maybe you should resample_segments before that."
+        assert isinstance(sample2['segments'], np.ndarray), \
+            f"MixUp: sample2 segments type expect numpy.ndarray, but got {type(sample2['segments'])}; " \
+            f"maybe you should add resample_segments in pre_transform."
+
+        image, image2 = sample['img'], sample2['img']
+        r = np.random.beta(alpha, beta)  # mixup ratio, alpha=beta=8.0
+        image = (image * r + image2 * (1 - r)).astype(np.uint8)
+
+        sample['img'] = image
+        sample['cls'] = np.concatenate((sample['cls'], sample2['cls']), 0)
+        sample['bboxes'] = np.concatenate((sample['bboxes'], sample2['bboxes']), 0)
+        sample['segments'] = np.concatenate((sample['segments'], sample2['segments']), 0)
+        return sample
+
+    def pastein(self, sample, num_sample=30):
+        bbox_format = sample['bbox_format']
+        assert bbox_format == 'ltrb', f'The bbox format should be ltrb, but got {bbox_format}'
+        assert not self.return_segments, "pastein currently does not support seg data."
+        assert not self.return_keypoints, "pastein currently does not support keypoint data."
+        sample.pop('segments', None)
+        sample.pop('keypoints', None)
+
+        image = sample['img']
+        cls = sample['cls']
+        bboxes = sample['bboxes']
+        # load sample
+        sample_labels, sample_images, sample_masks = [], [], []
+        while len(sample_labels) < num_sample:
+            sample_labels_, sample_images_, sample_masks_ = self._pastin_load_samples()
+            sample_labels += sample_labels_
+            sample_images += sample_images_
+            sample_masks += sample_masks_
+            if len(sample_labels) == 0:
+                break
+
+        # Applies image cutout augmentation https://arxiv.org/abs/1708.04552
+        h, w = image.shape[:2]
+
+        # create random masks
+        scales = [0.75] * 2 + [0.5] * 4 + [0.25] * 4 + [0.125] * 4 + [0.0625] * 6  # image size fraction
+        for s in scales:
+            if random.random() < 0.2:
+                continue
+            mask_h = random.randint(1, int(h * s))
+            mask_w = random.randint(1, int(w * s))
+
+            # box
+            xmin = max(0, random.randint(0, w) - mask_w // 2)
+            ymin = max(0, random.randint(0, h) - mask_h // 2)
+            xmax = min(w, xmin + mask_w)
+            ymax = min(h, ymin + mask_h)
+
+            box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
+            if len(bboxes):
+                ioa = bbox_ioa(box, bboxes)  # intersection over area
+            else:
+                ioa = np.zeros(1)
+
+            if (
+                    (ioa < 0.30).all() and len(sample_labels) and (xmax > xmin + 20) and (ymax > ymin + 20)
+            ):  # allow 30% obscuration of existing labels
+                sel_ind = random.randint(0, len(sample_labels) - 1)
+                hs, ws, cs = sample_images[sel_ind].shape
+                r_scale = min((ymax - ymin) / hs, (xmax - xmin) / ws)
+                r_w = int(ws * r_scale)
+                r_h = int(hs * r_scale)
+
+                if (r_w > 10) and (r_h > 10):
+                    r_mask = cv2.resize(sample_masks[sel_ind], (r_w, r_h))
+                    r_image = cv2.resize(sample_images[sel_ind], (r_w, r_h))
+                    temp_crop = image[ymin: ymin + r_h, xmin: xmin + r_w]
+                    m_ind = r_mask > 0
+                    if m_ind.astype(np.int_).sum() > 60:
+                        temp_crop[m_ind] = r_image[m_ind]
+                        box = np.array([xmin, ymin, xmin + r_w, ymin + r_h], dtype=np.float32)
+                        if len(bboxes):
+                            cls = np.concatenate((cls, [[sample_labels[sel_ind]]]), 0)
+                            bboxes = np.concatenate((bboxes, [box]), 0)
+                        else:
+                            cls = np.array([[sample_labels[sel_ind]]])
+                            bboxes = np.array([box])
+
+                        image[ymin: ymin + r_h, xmin: xmin + r_w] = temp_crop  # Modify on the original image
+
+        sample['img'] = image
+        sample['bboxes'] = bboxes
+        sample['cls'] = cls
+        return sample
+
+    def _pastin_load_samples(self):
+        # loads images in a 4-mosaic
+        classes4, bboxes4, segments4 = [], [], []
+        mosaic_samples = []
+        indices = random.choices(self.indices, k=4)  # 3 additional image indices
+        mosaic_samples += [self.get_sample(i) for i in indices]
+        s = self.img_size
+        mosaic_border = [-s // 2, -s // 2]
+        yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in mosaic_border]  # mosaic center x, y
+
+        for i, sample in enumerate(mosaic_samples):
+            # Load image
+            img = sample['img']
+            (h, w) = img.shape[:2]
+
+            # place img in img4
+            if i == 0:  # top left
+                img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
+            elif i == 1:  # top right
+                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
+                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
+            elif i == 2:  # bottom left
+                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
+            elif i == 3:  # bottom right
+                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
+
+            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+            padw = x1a - x1b
+            padh = y1a - y1b
+
+            # Labels
+            cls, bboxes = sample['cls'], sample['bboxes']
+            bboxes = xywhn2xyxy(bboxes, w, h, padw, padh)  # normalized xywh to pixel xyxy format
+
+            classes4.append(cls)
+            bboxes4.append(bboxes)
+
+            segments = sample['segments']
+            segments_is_list = isinstance(segments, list)
+            if segments_is_list:
+                segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
+                segments4.extend(segments)
+            else:
+                segments = xyn2xy(segments, w, h, padw, padh)
+                segments4.append(segments)
+
+        # Concat/clip labels
+        classes4 = np.concatenate(classes4, 0)
+        bboxes4 = np.concatenate(bboxes4, 0)
+        bboxes4 = bboxes4.clip(0, 2 * s)
+
+        if segments_is_list:
+            for x in segments4:
+                np.clip(x, 0, 2 * s, out=x)
+        else:
+            segments4 = np.concatenate(segments4, 0)
+            segments4 = segments4.clip(0, 2 * s)
+
+        # Augment
+        sample_labels, sample_images, sample_masks = \
+            self._pastin_sample_segments(img4, classes4, bboxes4, segments4, probability=0.5)
+
+        return sample_labels, sample_images, sample_masks
+
+    def _pastin_sample_segments(self, img, classes, bboxes, segments, probability=0.5):
+        # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
+        n = len(segments)
+        sample_labels = []
+        sample_images = []
+        sample_masks = []
+        if probability and n:
+            h, w, c = img.shape  # height, width, channels
+            for j in random.sample(range(n), k=round(probability * n)):
+                cls, l, s = classes[j], bboxes[j], segments[j]
+                box = (
+                    l[0].astype(int).clip(0, w - 1),
+                    l[1].astype(int).clip(0, h - 1),
+                    l[2].astype(int).clip(0, w - 1),
+                    l[3].astype(int).clip(0, h - 1),
+                )
+
+                if (box[2] <= box[0]) or (box[3] <= box[1]):
+                    continue
+
+                sample_labels.append(cls[0])
+
+                mask = np.zeros(img.shape, np.uint8)
+
+                cv2.drawContours(mask, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
+                sample_masks.append(mask[box[1]: box[3], box[0]: box[2], :])
+
+                result = cv2.bitwise_and(src1=img, src2=mask)
+                i = result > 0  # pixels to replace
+                mask[i] = result[i]  # cv2.imwrite('debug.jpg', img)  # debug
+                sample_images.append(mask[box[1]: box[3], box[0]: box[2], :])
+
+        return sample_labels, sample_images, sample_masks
+
+    def hsv_augment(self, sample, hgain=0.5, sgain=0.5, vgain=0.5):
+        image = sample['img']
+        r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1  # random gains
+        hue, sat, val = cv2.split(cv2.cvtColor(image, cv2.COLOR_BGR2HSV))
+        dtype = image.dtype  # uint8
+
+        x = np.arange(0, 256, dtype=np.int16)
+        lut_hue = ((x * r[0]) % 180).astype(dtype)
+        lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
+        lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
+
+        img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
+        cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=image)  # Modify on the original image
+
+        sample['img'] = image
+        return sample
+
+    def fliplr(self, sample):
+        # flip image left-right
+        image = sample['img']
+        image = np.fliplr(image)
+        sample['img'] = image
+
+        # flip box
+        _, w = image.shape[:2]
+        bboxes, bbox_format = sample['bboxes'], sample['bbox_format']
+        if bbox_format == "ltrb":
+            if len(bboxes):
+                x1 = bboxes[:, 0].copy()
+                x2 = bboxes[:, 2].copy()
+                bboxes[:, 0] = w - x2
+                bboxes[:, 2] = w - x1
+        elif bbox_format == "xywhn":
+            if len(bboxes):
+                bboxes[:, 0] = 1 - bboxes[:, 0]
+        else:
+            raise NotImplementedError
+        sample['bboxes'] = bboxes
+
+        # flip seg
+        if self.return_segments:
+            segment_format, segments = sample['segment_format'], sample['segments']
+            assert segment_format == 'polygon', \
+                f'FlipLR: The segment format should be polygon, but got {segment_format}'
+            assert isinstance(segments, np.ndarray), \
+                f"FlipLR: segments type expect numpy.ndarray, but got {type(segments)}; " \
+                f"maybe you should resample_segments before that."
+
+            if len(segments):
+                segments[..., 0] = w - segments[..., 0]
+
+            sample['segments'] = segments
+
+        return sample
+
+    def letterbox(self, sample, new_shape=None, xywhn2xyxy_=True, scaleup=False, only_image=False, color=(114, 114, 114)):
+        # Resize and pad image while meeting stride-multiple constraints
+        if sample['bbox_format'] == 'ltrb':
+            xywhn2xyxy_ = False
+
+        if not new_shape:
+            new_shape = self.img_size
+
+        if isinstance(new_shape, int):
+            new_shape = (new_shape, new_shape)
+
+        image = sample['img']
+        shape = image.shape[:2]  # current shape [height, width]
+
+        h, w = shape[:]
+        ori_shape = sample['ori_shape']
+        h0, w0 = ori_shape
+        hw_scale = np.array([h / h0, w / w0])
+        sample['hw_scale'] = hw_scale
+
+        # Scale ratio (new / old)
+        r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+        if not scaleup:  # only scale down, do not scale up (for better test mAP)
+            r = min(r, 1.0)
+
+        # Compute padding
+        new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+        dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+
+        dw /= 2  # divide padding into 2 sides
+        dh /= 2
+        hw_pad = np.array([dh, dw])
+
+        if shape != new_shape:
+            if shape[::-1] != new_unpad:  # resize
+                image = cv2.resize(image, new_unpad, interpolation=cv2.INTER_LINEAR)
+            top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+            left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+            image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+            sample['hw_pad'] = hw_pad
+        else:
+            sample['hw_pad'] = np.array([0., 0.])
+        bboxes = sample['bboxes']
+        if not only_image:
+            # convert bboxes
+            if len(bboxes):
+                if xywhn2xyxy_:
+                    bboxes = xywhn2xyxy(bboxes, r * w, r * h, padw=dw, padh=dh)
+                else:
+                    bboxes *= r
+                    bboxes[:, [0, 2]] += dw
+                    bboxes[:, [1, 3]] += dh
+                sample['bboxes'] = bboxes
+            sample['bbox_format'] = 'ltrb'
+
+            # convert segments
+            if 'segments' in sample:
+                segments, segment_format = sample['segments'], sample['segment_format']
+                assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'
+
+                if len(segments):
+                    if isinstance(segments, np.ndarray):
+                        if xywhn2xyxy_:
+                            segments[..., 0] *= w
+                            segments[..., 1] *= h
+                        else:
+                            segments *= r
+                        segments[..., 0] += dw
+                        segments[..., 1] += dh
+                    elif isinstance(segments, list):
+                        for segment in segments:
+                            if xywhn2xyxy_:
+                                segment[..., 0] *= w
+                                segment[..., 1] *= h
+                            else:
+                                segment *= r
+                            segment[..., 0] += dw
+                            segment[..., 1] += dh
+                    sample['segments'] = segments
+
+        sample['img'] = image
+        return sample
+
+    def label_norm(self, sample, xyxy2xywh_=True):
+        bbox_format = sample['bbox_format']
+        if bbox_format == "xywhn":
+            return sample
+
+        bboxes = sample['bboxes']
+        if len(bboxes) == 0:
+            sample['bbox_format'] = 'xywhn'
+            return sample
+
+        if xyxy2xywh_:
+            bboxes = xyxy2xywh(bboxes)  # convert xyxy to xywh
+        height, width = sample['img'].shape[:2]
+        bboxes[:, [1, 3]] /= height  # normalized height 0-1
+        bboxes[:, [0, 2]] /= width  # normalized width 0-1
+        sample['bboxes'] = bboxes
+        sample['bbox_format'] = 'xywhn'
+
+        return sample
+
+    def label_pad(self, sample, padding_size=160, padding_value=-1):
+        # create fixed label, avoid dynamic shape problem.
+        bbox_format = sample['bbox_format']
+        assert bbox_format == 'xywhn', f'The bbox format should be xywhn, but got {bbox_format}'
+
+        cls, bboxes = sample['cls'], sample['bboxes']
+        cls_pad = np.full((padding_size, 1), padding_value, dtype=np.float32)
+        bboxes_pad = np.full((padding_size, 4), padding_value, dtype=np.float32)
+        nL = len(bboxes)
+        if nL:
+            cls_pad[:min(nL, padding_size)] = cls[:min(nL, padding_size)]
+            bboxes_pad[:min(nL, padding_size)] = bboxes[:min(nL, padding_size)]
+        sample['cls'] = cls_pad
+        sample['bboxes'] = bboxes_pad
+
+        if "segments" in sample:
+            if sample['segment_format'] == "mask":
+                segments = sample['segments']
+                assert isinstance(segments, np.ndarray), \
+                    f"Label Pad: segments type expect numpy.ndarray, but got {type(segments)}; " \
+                    f"maybe you should resample_segments before that."
+                assert nL == segments.shape[0], f"Label Pad: segments len not equal bboxes"
+                h, w = segments.shape[1:]
+                segments_pad = np.full((padding_size, h, w), padding_value, dtype=np.float32)
+                segments_pad[:min(nL, padding_size)] = segments[:min(nL, padding_size)]
+                sample['segments'] = segments_pad
+
+        return sample
+
+    def image_norm(self, sample, scale=255.0):
+        image = sample['img']
+        image = image.astype(np.float32, copy=False)
+        image /= scale
+        sample['img'] = image
+        return sample
+
+    def image_transpose(self, sample, bgr2rgb=True, hwc2chw=True):
+        image = sample['img']
+        if bgr2rgb:
+            image = image[:, :, ::-1]
+        if hwc2chw:
+            image = image.transpose(2, 0, 1)
+        sample['img'] = image
+        return sample
+
+    def segment_poly2mask(self, sample, mask_overlap, mask_ratio):
+        """convert polygon points to bitmap."""
+        segments, segment_format = sample['segments'], sample['segment_format']
+        assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'
+        assert isinstance(segments, np.ndarray), \
+            f"Segment Poly2Mask: segments type expect numpy.ndarray, but got {type(segments)}; " \
+            f"maybe you should resample_segments before that."
+
+        h, w = sample['img'].shape[:2]
+        if mask_overlap:
+            masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=mask_ratio)
+            sample['cls'] = sample['cls'][sorted_idx]
+            sample['bboxes'] = sample['bboxes'][sorted_idx]
+            sample['segments'] = masks  # (h/mask_ratio, w/mask_ratio)
+            sample['segment_format'] = 'overlap'
+        else:
+            masks = polygons2masks((h, w), segments, color=1, downsample_ratio=mask_ratio)
+            sample['segments'] = masks
+            sample['segment_format'] = 'mask'
+
+        return sample
+
+    def _img2label_paths(self, img_paths):
+        # Define label paths as a function of image paths
+        sa, sb = os.sep + "images" + os.sep, os.sep + "labels" + os.sep  # /images/, /labels/ substrings
+        return ["txt".join(x.replace(sa, sb, 1).rsplit(x.split(".")[-1], 1)) for x in img_paths]
+
+    def _get_hash(self, paths):
+        # Returns a single hash value of a list of paths (files or dirs)
+        size = sum(os.path.getsize(p) for p in paths if os.path.exists(p))  # sizes
+        h = hashlib.md5(str(size).encode())  # hash sizes
+        h.update("".join(paths).encode())  # hash paths
+        return h.hexdigest()  # return hash
+
+    def _exif_size(self, img):
+        # Returns exif-corrected PIL size
+        s = img.size  # (width, height)
+        try:
+            rotation = dict(img._getexif().items())[orientation]
+            if rotation == 6:  # rotation 270
+                s = (s[1], s[0])
+            elif rotation == 8:  # rotation 90
+                s = (s[1], s[0])
+        except:
+            pass
+
+        return s
+
+    def train_collate_fn(self, batch_samples, batch_info):
+        imgs = [sample.pop('img') for sample in batch_samples]
+        labels = []
+        for i, sample in enumerate(batch_samples):
+            cls, bboxes = sample.pop('cls'), sample.pop('bboxes')
+            labels.append(np.concatenate((np.full_like(cls, i), cls, bboxes), axis=-1))
+        return_items = [np.stack(imgs, 0), np.stack(labels, 0)]
+
+        if self.return_segments:
+            masks = [sample.pop('segments', None) for sample in batch_samples]
+            return_items.append(np.stack(masks, 0))
+        if self.return_keypoints:
+            keypoints = [sample.pop('keypoints', None) for sample in batch_samples]
+            return_items.append(np.stack(keypoints, 0))
+
+        return tuple(return_items)
+
+    def test_collate_fn(self, batch_samples, batch_info):
+        imgs = [sample.pop('img') for sample in batch_samples]
+        path = [sample.pop('im_file') for sample in batch_samples]
+        hw_ori = [sample.pop('ori_shape') for sample in batch_samples]
+        hw_scale = [sample.pop('hw_scale') for sample in batch_samples]
+        pad = [sample.pop('hw_pad') for sample in batch_samples]
+        return (
+            np.stack(imgs, 0),
+            path,
+            np.stack(hw_ori, 0),
+            np.stack(hw_scale, 0),
+            np.stack(pad, 0),
+        )
+
+
+ + + +
+ + + + + + + + + +
+ + +

+ mindyolo.data.dataset.COCODataset.get_sample(index) + +

+ + +
+ +

Get and return label information from the dataset.

+ +
+ Source code in mindyolo/data/dataset.py +
321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
def get_sample(self, index):
+    """Get and return label information from the dataset."""
+    sample = deepcopy(self.labels[index])
+    if self.imgs is None:
+        path = self.img_files[index]
+        img = cv2.imread(path)  # BGR
+        assert img is not None, "Image Not Found " + path
+        h_ori, w_ori = img.shape[:2]  # orig hw
+        r = self.img_size / max(h_ori, w_ori)  # resize image to img_size
+        if r != 1:  # always resize down, only resize up if training with augmentation
+            interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
+            img = cv2.resize(img, (int(w_ori * r), int(h_ori * r)), interpolation=interp)
+
+        sample['img'], sample['ori_shape'] = img, np.array([h_ori, w_ori])  # img, hw_original
+
+    else:
+        sample['img'], sample['ori_shape'] = self.imgs[index], self.img_hw_ori[index]  # img, hw_original
+
+    return sample
+
+
+
+ +
+ +
+ + +

+ mindyolo.data.dataset.COCODataset.segment_poly2mask(sample, mask_overlap, mask_ratio) + +

+ + +
+ +

convert polygon points to bitmap.

+ +
+ Source code in mindyolo/data/dataset.py +
1111
+1112
+1113
+1114
+1115
+1116
+1117
+1118
+1119
+1120
+1121
+1122
+1123
+1124
+1125
+1126
+1127
+1128
+1129
+1130
+1131
def segment_poly2mask(self, sample, mask_overlap, mask_ratio):
+    """convert polygon points to bitmap."""
+    segments, segment_format = sample['segments'], sample['segment_format']
+    assert segment_format == 'polygon', f'The segment format should be polygon, but got {segment_format}'
+    assert isinstance(segments, np.ndarray), \
+        f"Segment Poly2Mask: segments type expect numpy.ndarray, but got {type(segments)}; " \
+        f"maybe you should resample_segments before that."
+
+    h, w = sample['img'].shape[:2]
+    if mask_overlap:
+        masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=mask_ratio)
+        sample['cls'] = sample['cls'][sorted_idx]
+        sample['bboxes'] = sample['bboxes'][sorted_idx]
+        sample['segments'] = masks  # (h/mask_ratio, w/mask_ratio)
+        sample['segment_format'] = 'overlap'
+    else:
+        masks = polygons2masks((h, w), segments, color=1, downsample_ratio=mask_ratio)
+        sample['segments'] = masks
+        sample['segment_format'] = 'mask'
+
+    return sample
+
+
+
+ +
+ + + +
+ +
+ +

Albumentations

+ + +
+ + + +

+ mindyolo.data.albumentations.Albumentations + + +

+ + +
+ + +
+ Source code in mindyolo/data/albumentations.py +
10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
class Albumentations:
+    # Implement Albumentations augmentation https://github.com/ultralytics/yolov5
+    # YOLOv5 Albumentations class (optional, only used if package is installed)
+    def __init__(self, size=640, random_resized_crop=True, **kwargs):
+        self.transform = None
+        prefix = _colorstr("albumentations: ")
+        try:
+            import albumentations as A
+
+            _check_version(A.__version__, "1.0.3", hard=True)  # version requirement
+            T = []
+            if random_resized_crop:
+                T.extend([
+                    A.RandomResizedCrop(height=size, width=size, scale=(0.8, 1.0), ratio=(0.9, 1.11), p=0.0),
+                ])
+            T.extend([
+                A.Blur(p=0.01),
+                A.MedianBlur(p=0.01),
+                A.ToGray(p=0.01),
+                A.CLAHE(p=0.01),
+                A.RandomBrightnessContrast(p=0.0),
+                A.RandomGamma(p=0.0),
+                A.ImageCompression(quality_lower=75, p=0.0),
+            ])
+            self.transform = A.Compose(T, bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]))
+
+            print(prefix + ", ".join(f"{x}".replace("always_apply=False, ", "") for x in T if x.p), flush=True)
+            print("[INFO] albumentations load success", flush=True)
+        except ImportError:  # package not installed, skip
+            pass
+            print("[WARNING] package not installed, albumentations load failed", flush=True)
+        except Exception as e:
+            print(f"{prefix}{e}", flush=True)
+            print("[WARNING] albumentations load failed", flush=True)
+
+    def __call__(self, sample, p=1.0, **kwargs):
+        if self.transform and random.random() < p:
+            im, bboxes, cls, bbox_format = sample['img'], sample['bboxes'], sample['cls'], sample['bbox_format']
+            assert bbox_format in ("ltrb", "xywhn")
+            if bbox_format == "ltrb" and bboxes.shape[0] > 0:
+                h, w = im.shape[:2]
+                bboxes = xyxy2xywh(bboxes)
+                bboxes[:, [0, 2]] /= w
+                bboxes[:, [1, 3]] /= h
+
+            new = self.transform(image=im, bboxes=bboxes, class_labels=cls)  # transformed
+
+            sample['img'] = new['image']
+            sample['bboxes'] = np.array(new['bboxes'])
+            sample['cls'] = np.array(new['class_labels'])
+            sample['bbox_format'] = "xywhn"
+
+        return sample
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/reference/models/index.html b/zh/reference/models/index.html new file mode 100644 index 00000000..7c9067ae --- /dev/null +++ b/zh/reference/models/index.html @@ -0,0 +1,1709 @@ + + + + + + + + + + + + + + + + + + + + + + + + 模型 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

模型

+

创建模型

+ + +
+ + +

+ mindyolo.models.model_factory.create_model(model_name, model_cfg=None, in_channels=3, num_classes=80, checkpoint_path='', **kwargs) + +

+ + +
+ +
+ Source code in mindyolo/models/model_factory.py +
16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
def create_model(
+    model_name: str,
+    model_cfg: dict = None,
+    in_channels: int = 3,
+    num_classes: int = 80,
+    checkpoint_path: str = "",
+    **kwargs,
+):
+    model_args = dict(cfg=model_cfg, num_classes=num_classes, in_channels=in_channels)
+    kwargs = {k: v for k, v in kwargs.items() if v is not None}
+
+    if not is_model(model_name):
+        raise RuntimeError(f"Unknown model {model_name}")
+
+    create_fn = model_entrypoint(model_name)
+    model = create_fn(**model_args, **kwargs)
+
+    if checkpoint_path:
+        assert os.path.isfile(checkpoint_path) and checkpoint_path.endswith(
+            ".ckpt"
+        ), f"[{checkpoint_path}] not a ckpt file."
+        checkpoint_param = load_checkpoint(checkpoint_path)
+        load_param_into_net(model, checkpoint_param)
+        logger.info(f"Load checkpoint from [{checkpoint_path}] success.")
+
+    return model
+
+
+
+ +

YOLOV3

+ + +
+ + +

+ mindyolo.models.yolov3(cfg, in_channels=3, num_classes=None, **kwargs) + +

+ + +
+ +

Get yolov3 model.

+ +
+ Source code in mindyolo/models/yolov3.py +
44
+45
+46
+47
+48
@register_model
+def yolov3(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv3:
+    """Get yolov3 model."""
+    model = YOLOv3(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+
+ +

YOLOV4

+ + +
+ + +

+ mindyolo.models.yolov4(cfg, in_channels=3, num_classes=None, **kwargs) + +

+ + +
+ +

Get yolov4 model.

+ +
+ Source code in mindyolo/models/yolov4.py +
33
+34
+35
+36
+37
@register_model
+def yolov4(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv4:
+    """Get yolov4 model."""
+    model = YOLOv4(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+
+ +

YOLOV5

+ + +
+ + +

+ mindyolo.models.yolov5(cfg, in_channels=3, num_classes=None, **kwargs) + +

+ + +
+ +

Get yolov5 model.

+ +
+ Source code in mindyolo/models/yolov5.py +
44
+45
+46
+47
+48
@register_model
+def yolov5(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv5:
+    """Get yolov5 model."""
+    model = YOLOv5(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+
+ +

YOLOV7

+ + +
+ + +

+ mindyolo.models.yolov7(cfg, in_channels=3, num_classes=None, **kwargs) + +

+ + +
+ +

Get yolov7 model.

+ +
+ Source code in mindyolo/models/yolov7.py +
46
+47
+48
+49
+50
@register_model
+def yolov7(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv7:
+    """Get yolov7 model."""
+    model = YOLOv7(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+
+ +

YOLOV8

+ + +
+ + +

+ mindyolo.models.yolov8(cfg, in_channels=3, num_classes=None, **kwargs) + +

+ + +
+ +

Get yolov8 model.

+ +
+ Source code in mindyolo/models/yolov8.py +
45
+46
+47
+48
+49
@register_model
+def yolov8(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv8:
+    """Get yolov8 model."""
+    model = YOLOv8(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+
+ +

YOLOX

+ + +
+ + +

+ mindyolo.models.yolox(cfg, in_channels=3, num_classes=None, **kwargs) + +

+ + +
+ +

Get yolox model.

+ +
+ Source code in mindyolo/models/yolox.py +
43
+44
+45
+46
+47
@register_model
+def yolox(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOX:
+    """Get yolox model."""
+    model = YOLOX(cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/tutorials/configuration/index.html b/zh/tutorials/configuration/index.html new file mode 100644 index 00000000..8f226646 --- /dev/null +++ b/zh/tutorials/configuration/index.html @@ -0,0 +1,1730 @@ + + + + + + + + + + + + + + + + + + + + + + + + 配置 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

配置

+

MindYOLO套件同时支持yaml文件参数和命令行参数解析,并将相对固定、与模型强相关、较为复杂或者含有嵌套结构的参数编写成yaml文件,需根据实际应用场景更改或者较为简单的参数则通过命令行传入。

+

下面以yolov3为例,解释如何配置相应的参数。

+

参数继承关系

+

参数优先级由高到低如下,出现同名参数时,低优先级参数会被高优先级参数覆盖

+
    +
  • 用户命令行传入参数
  • +
  • python执行py文件中parser的默认参数
  • +
  • 命令行传入config参数对应的yaml文件参数
  • +
  • 命令行传入config参数对应的yaml文件中__BASE__参数中包含的yaml文件参数,例如yolov3.yaml含有如下参数: +
    __BASE__: [
    +  '../coco.yaml',
    +  './hyp.scratch.yaml',
    +]
    +
  • +
+

基础参数

+

参数说明

+
    +
  • device_target: 所用设备,Ascend/GPU/CPU
  • +
  • save_dir: 运行结果保存路径,默认为./runs
  • +
  • log_interval: 打印日志step间隔,默认为100
  • +
  • is_parallel: 是否分布式训练,默认为False
  • +
  • ms_mode: 使用静态图模式(0)或动态图模式(1),默认为0。
  • +
  • config: yaml配置文件路径
  • +
  • per_batch_size: 每张卡batch size,默认为32
  • +
  • epochs: 训练epoch数,默认为300
  • +
  • ...
  • +
+

parse参数设置

+

该部分参数通常由命令行传入,示例如下:

+
mpirun --allow-run-as-root -n 8 python train.py --config ./configs/yolov7/yolov7.yaml  --is_parallel True --log_interval 50
+
+

数据集

+

参数说明

+
    +
  • dataset_name: 数据集名称
  • +
  • train_set: 训练集所在路径
  • +
  • val_set: 验证集所在路径
  • +
  • test_set: 测试集所在路径
  • +
  • nc: 数据集类别数
  • +
  • names: 类别名称
  • +
  • ...
  • +
+

yaml文件样例

+

该部分参数在configs/coco.yaml中定义,通常需修改其中的数据集路径

+
data:
+ dataset_name: coco
+
+ train_set: ./coco/train2017.txt  # 118287 images
+ val_set: ./coco/val2017.txt  # 5000 images
+ test_set: ./coco/test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
+
+ nc: 80
+
+ # class names
+ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
+          'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+          'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+          'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+          'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+          'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+          'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
+          'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+          'hair drier', 'toothbrush' ]
+
+

数据增强

+

参数说明

+
    +
  • num_parallel_workers: 读取数据的工作进程数
  • +
  • train_transformers: 训练过程数据增强
  • +
  • test_transformers: 验证过程数据增强
  • +
  • ...
  • +
+

yaml文件样例

+

该部分参数在configs/yolov3/hyp.scratch.yaml中定义,其中train_transformers和test_transformers均为由字典组成的列表,各字典包含数据增强操作名称、发生概率及该增强方法相关的参数

+
data:
+  num_parallel_workers: 4
+
+  train_transforms:
+    - { func_name: mosaic, prob: 1.0, mosaic9_prob: 0.0, translate: 0.1, scale: 0.9 }
+    - { func_name: mixup, prob: 0.1, alpha: 8.0, beta: 8.0, needed_mosaic: True }
+    - { func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4 }
+    - { func_name: label_norm, xyxy2xywh_: True }
+    - { func_name: albumentations }
+    - { func_name: fliplr, prob: 0.5 }
+    - { func_name: label_pad, padding_size: 160, padding_value: -1 }
+    - { func_name: image_norm, scale: 255. }
+    - { func_name: image_transpose, bgr2rgb: True, hwc2chw: True }
+
+  test_transforms:
+    - { func_name: letterbox, scaleup: False }
+    - { func_name: label_norm, xyxy2xywh_: True }
+    - { func_name: label_pad, padding_size: 160, padding_value: -1 }
+    - { func_name: image_norm, scale: 255. }
+    - { func_name: image_transpose, bgr2rgb: True, hwc2chw: True }
+
+

模型

+

参数说明

+
    +
  • model_name: 模型名称
  • +
  • depth_multiple: 模型深度因子
  • +
  • width_multiple: 模型宽度因子
  • +
  • stride: 特征图下采样倍数
  • +
  • anchors: 预设锚框
  • +
  • backbone: 模型骨干网络
  • +
  • head: 模型检测头
  • +
+

yaml文件样例

+

该部分参数在configs/yolov3/yolov3.yaml中定义,根据backbon和head参数进行网络构建,参数以嵌套列表的形式呈现,每行代表一层模块,包含4个参数,分别是 输入层编号(-1代表上一层)、模块重复次数、模块名称和模块相应参数。用户也可以不借助yaml文件而直接在py文件中定义和注册网络。

+
network:
+  model_name: yolov3
+
+  depth_multiple: 1.0  # model depth multiple
+  width_multiple: 1.0  # layer channel multiple
+  stride: [8, 16, 32]
+  anchors:
+    - [10,13, 16,30, 33,23]  # P3/8
+    - [30,61, 62,45, 59,119]  # P4/16
+    - [116,90, 156,198, 373,326]  # P5/32
+
+  # darknet53 backbone
+  backbone:
+    # [from, number, module, args]
+    [[-1, 1, ConvNormAct, [32, 3, 1]],  # 0
+     [-1, 1, ConvNormAct, [64, 3, 2]],  # 1-P1/2
+     [-1, 1, Bottleneck, [64]],
+     [-1, 1, ConvNormAct, [128, 3, 2]],  # 3-P2/4
+     [-1, 2, Bottleneck, [128]],
+     [-1, 1, ConvNormAct, [256, 3, 2]],  # 5-P3/8
+     [-1, 8, Bottleneck, [256]],
+     [-1, 1, ConvNormAct, [512, 3, 2]],  # 7-P4/16
+     [-1, 8, Bottleneck, [512]],
+     [-1, 1, ConvNormAct, [1024, 3, 2]],  # 9-P5/32
+     [-1, 4, Bottleneck, [1024]],  # 10
+    ]
+
+  # YOLOv3 head
+  head:
+    [[-1, 1, Bottleneck, [1024, False]],
+     [-1, 1, ConvNormAct, [512, 1, 1]],
+     [-1, 1, ConvNormAct, [1024, 3, 1]],
+     [-1, 1, ConvNormAct, [512, 1, 1]],
+     [-1, 1, ConvNormAct, [1024, 3, 1]],  # 15 (P5/32-large)
+
+     [-2, 1, ConvNormAct, [256, 1, 1]],
+     [-1, 1, Upsample, [None, 2, 'nearest']],
+     [[-1, 8], 1, Concat, [1]],  # cat backbone P4
+     [-1, 1, Bottleneck, [512, False]],
+     [-1, 1, Bottleneck, [512, False]],
+     [-1, 1, ConvNormAct, [256, 1, 1]],
+     [-1, 1, ConvNormAct, [512, 3, 1]],  # 22 (P4/16-medium)
+
+     [-2, 1, ConvNormAct, [128, 1, 1]],
+     [-1, 1, Upsample, [None, 2, 'nearest']],
+     [[-1, 6], 1, Concat, [1]],  # cat backbone P3
+     [-1, 1, Bottleneck, [256, False]],
+     [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
+
+     [[27, 22, 15], 1, YOLOv3Head, [nc, anchors, stride]],   # Detect(P3, P4, P5)
+    ]
+
+

损失函数

+

参数说明

+
    +
  • name: 损失函数名称
  • +
  • box: box损失权重
  • +
  • cls: class损失权重
  • +
  • cls_pw: class损失正样本权重
  • +
  • obj: object损失权重
  • +
  • obj_pw: object损失正样本权重
  • +
  • fl_gamma: focal loss gamma
  • +
  • anchor_t: anchor shape比例阈值
  • +
  • label_smoothing: 标签平滑值
  • +
+

yaml文件样例

+

该部分参数在configs/yolov3/hyp.scratch.yaml中定义

+
loss:
+  name: YOLOv7Loss
+  box: 0.05  # box loss gain
+  cls: 0.5  # cls loss gain
+  cls_pw: 1.0  # cls BCELoss positive_weight
+  obj: 1.0  # obj loss gain (scale with pixels)
+  obj_pw: 1.0  # obj BCELoss positive_weight
+  fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
+  anchor_t: 4.0  # anchor-multiple threshold
+  label_smoothing: 0.0 # label smoothing epsilon
+
+

优化器

+

参数说明

+
    +
  • optimizer: 优化器名称。
  • +
  • lr_init: 学习率初始值
  • +
  • warmup_epochs: warmup epoch数
  • +
  • warmup_momentum: warmup momentum初始值
  • +
  • warmup_bias_lr: warmup bias学习率初始值
  • +
  • min_warmup_step: 最小warmup step数
  • +
  • group_param: 参数分组策略
  • +
  • gp_weight_decay: 分组参数权重衰减系数
  • +
  • start_factor: 初始学习率因数
  • +
  • end_factor: 结束学习率因数
  • +
  • momentum:移动平均的动量
  • +
  • loss_scale:loss缩放系数
  • +
  • nesterov:是否使用Nesterov Accelerated Gradient (NAG)算法更新梯度。
  • +
+

yaml文件样例

+

该部分参数在configs/yolov3/hyp.scratch.yaml中定义,如下示例中经过warmup阶段后的初始学习率为lr_init * start_factor = 0.01 * 1.0 = 0.01, 最终学习率为lr_init * end_factor = 0.01 * 0.01 = 0.0001

+
optimizer:
+  optimizer: momentum
+  lr_init: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
+  momentum: 0.937  # SGD momentum/Adam beta1
+  nesterov: True # update gradients with NAG(Nesterov Accelerated Gradient) algorithm
+  loss_scale: 1.0 # loss scale for optimizer
+  warmup_epochs: 3  # warmup epochs (fractions ok)
+  warmup_momentum: 0.8  # warmup initial momentum
+  warmup_bias_lr: 0.1  # warmup initial bias lr
+  min_warmup_step: 1000 # minimum warmup step
+  group_param: yolov7 # group param strategy
+  gp_weight_decay: 0.0005  # group param weight decay 5e-4
+  start_factor: 1.0
+  end_factor: 0.01
+
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/tutorials/data_augmentation/index.html b/zh/tutorials/data_augmentation/index.html new file mode 100644 index 00000000..a23a46cb --- /dev/null +++ b/zh/tutorials/data_augmentation/index.html @@ -0,0 +1,1375 @@ + + + + + + + + + + + + + + + + + + + + + + + + 数据增强 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

数据增强

+

套件自带的数据增强方法清单

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
数据增强方法名概要解释
mosaic随机选择mosaic4和mosaic9
mosaic44分格拼接
mosaic99分格拼接
mixup对两个图像进行线性混合
pastein剪贴增强
random_perspective随机透视变换
hsv_augment随机颜色变换
fliplr水平翻转
flipud垂直翻转
letterbox缩放和填充
label_norm标签归一化 坐标归一化到0-1到范围
label_pad将标签信息填充为固定大小的数组
image_norm图像数据标准化
image_transpose通道转置和维度转置
albumentationsalbumentations数据增强
+

这些数据增强函数定义在 mindyolo/data/dataset.py 中。

+

使用方法

+

MindYOLO数据增强方法通过在yaml文件里配置。例如,训练过程添加一个数据增强,需要在yaml文件data.train_transforms字段下添加一个字典列表,数据增强方法自上而下依次罗列。

+

一个典型的数据增强方法配置字典里必须有func_name,表示应用的数据增强方法名,而后罗列该方法需要设置的参数,若没有在数据增强配置字典中配置参数项,则会选择该数据增强方法默认的数值。

+

数据增强通用配置字典: +

- {func_name: 数据增强方法名1, args11=x11, args12=x12, ..., args1n=x1n}
+- {func_name: 数据增强方法名2, args21=x21, args22=x22, ..., args2n=x2n}
+...
+- {func_name: 数据增强方法名n, argsn1=xn1, argsn2=xn2, ..., argsnn=xnn}
+

+

以YOLOv7训练数据增强示例: +

# 文件目录:configs/yolov7/hyp.scratch.tiny.yaml (https://github.com/mindspore-lab/mindyolo/blob/master/configs/yolov7/hyp.scratch.tiny.yaml)
+  train_transforms:
+    - {func_name: mosaic, prob: 1.0, mosaic9_prob: 0.2, translate: 0.1, scale: 0.5}
+    - {func_name: mixup, prob: 0.05, alpha: 8.0, beta: 8.0, needed_mosaic: True}
+    - {func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4}
+    - {func_name: pastein, prob: 0.05, num_sample: 30}
+    - {func_name: label_norm, xyxy2xywh_: True}
+    - {func_name: fliplr, prob: 0.5}
+    - {func_name: label_pad, padding_size: 160, padding_value: -1}
+    - {func_name: image_norm, scale: 255.}
+    - {func_name: image_transpose, bgr2rgb: True, hwc2chw: True}
+
+注意:func_name表示数据增强方法名,prob,mosaic9_prob,translate,scale为该方法参数。 其中prob为所有方法均有的参数,表示该数据增强方法的执行概率,默认值为1

+

上述yaml文件执行的具体操作如下:

+
    +
  • +

    mosaic:以1.0的概率对输入的图片进行mosaic操作,即将4张不同的图片拼接成一张图片。mosaic9_prob表示使用9宫格方式进行拼接的概率,translate和scale分别表示随机平移和缩放的程度。 +如图所示: +

    +
  • +
  • +

    mixup:以0.05的概率对输入的图片进行mixup操作,即将两张不同的图片进行混合。其中alpha和beta表示混合系数,needed_mosaic表示是否需要使用mosaic进行混合。

    +
  • +
  • +

    hsv_augment: HSV增强, 以1.0的概率对输入的图片进行HSV颜色空间的调整,增加数据多样性。其中hgain、sgain和vgain分别表示对H、S、V通道的调整程度。

    +
  • +
  • +

    pastein:以0.05的概率在输入的图片中随机贴入一些样本。其中num_sample表示随机贴入的样本数量。

    +
  • +
  • +

    label_norm:将输入的标签从(x1, y1, x2, y2)的格式转换为(x, y, w, h)的格式。

    +
  • +
  • +

    fliplr:以0.5的概率对输入的图片进行水平翻转,增加数据多样性。

    +
  • +
  • +

    label_pad:对输入的标签进行填充,使得每个图片都有相同数量的标签。padding_size表示填充后标签的数量,padding_value表示填充的值。

    +
  • +
  • +

    image_norm:将输入的图片像素值从[0, 255]范围内缩放到[0, 1]范围内。

    +
  • +
  • +

    image_transpose:将输入的图片从BGR格式转换为RGB格式,并将图片的通道数从HWC格式转换为CHW格式。

    +
  • +
+

测试数据增强需要用test_transforms字段标注,配置方法同训练。

+

自定义数据增强

+

编写指南:

+
    +
  • mindyolo/data/dataset.py文件COCODataset类中添加自定义数据增强方法
  • +
  • 数据增强方法的输入通常包含图片、标签和自定义参数。
  • +
  • 编写函数体内容,自定义输出
  • +
+

一个典型的数据增强方法: +

#在mindyolo/data/dataset.py COCODataset 添加子方法
+    def data_trans_func(self, image, labels, args1=x1, args2=x2, ..., argsn=xn):
+        # 数据增强逻辑
+        ......
+        return image, labels
+
+自定义一个功能为旋转的数据增强函数 +
#mindyolo/data/dataset.py
+    def rotate(self, image, labels, angle):
+        # rotate image
+        image = np.rot90(image, angle // 90)
+        if len(labels):
+            if angle == 90:
+                labels[:, 0], labels[:, 1] = 1 - labels[:, 1], labels[:, 0]
+            elif angle == 180:
+                labels[:, 0], labels[:, 1] = 1 - labels[:, 0], 1 - labels[:, 1]
+            elif angle == 270:
+                labels[:, 0], labels[:, 1] = labels[:, 1], 1 - labels[:, 0]
+        return image, labels
+

+

使用指南: +- 在模型的yaml文件中,以字典的形式定义此数据增强方法。与上文所述用法一致 +

    - {func_name: rotate, angle: 90}
+

+

效果展示:

+

     

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/tutorials/deployment/index.html b/zh/tutorials/deployment/index.html new file mode 100644 index 00000000..0a5f3b56 --- /dev/null +++ b/zh/tutorials/deployment/index.html @@ -0,0 +1,2069 @@ + + + + + + + + + + + + + + + + + + + + + + + + 部署 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

部署

+

依赖

+
pip install -r requirement.txt
+
+

MindSpore Lite环境准备

+

参考:Lite环境配置
+ 注意:MindSpore Lite适配的python环境为3.7,请在安装Lite前准备好python3.7的环境

+
    +
  1. +

    根据环境,下载配套的tar.gz包和whl包

    +
  2. +
  3. +

    解压tar.gz包并安装对应版本的whl包 +

    tar -zxvf mindspore_lite-2.0.0a0-cp37-cp37m-{os}_{platform}_64.tar.gz
    +pip install mindspore_lite-2.0.0a0-cp37-cp37m-{os}_{platform}_64.whl
    +

    +
  4. +
  5. 配置Lite的环境变量 + LITE_HOME为tar.gz解压出的文件夹路径,推荐使用绝对路径 +
    export LITE_HOME=/path/to/mindspore-lite-{version}-{os}-{platform}
    +export LD_LIBRARY_PATH=$LITE_HOME/runtime/lib:$LITE_HOME/tools/converter/lib:$LD_LIBRARY_PATH
    +export PATH=$LITE_HOME/tools/converter/converter:$LITE_HOME/tools/benchmark:$PATH
    +
  6. +
+

快速开始

+

模型转换

+

ckpt模型转为mindir模型,此步骤可在CPU/Ascend910上运行 +

python ./deploy/export.py --config ./path_to_config/model.yaml --weight ./path_to_ckpt/weight.ckpt --per_batch_size 1 --file_format MINDIR --device_target [CPU/Ascend]
+e.g.
+# 在CPU上运行
+python ./deploy/export.py --config ./configs/yolov5/yolov5n.yaml --weight yolov5n_300e_mAP273-9b16bd7b.ckpt --per_batch_size 1 --file_format MINDIR --device_target CPU
+# 在Ascend上运行
+python ./deploy/export.py --config ./configs/yolov5/yolov5n.yaml --weight yolov5n_300e_mAP273-9b16bd7b.ckpt --per_batch_size 1 --file_format MINDIR --device_target Ascend
+

+

Lite Test

+
python deploy/test.py --model_type Lite --model_path ./path_to_mindir/weight.mindir --config ./path_to_config/yolo.yaml
+e.g.
+python deploy/test.py --model_type Lite --model_path ./yolov5n.mindir --config ./configs/yolov5/yolov5n.yaml
+
+

Lite Predict

+
python ./deploy/predict.py --model_type Lite --model_path ./path_to_mindir/weight.mindir --config ./path_to_conifg/yolo.yaml --image_path ./path_to_image/image.jpg
+e.g.
+python deploy/predict.py --model_type Lite --model_path ./yolov5n.mindir --config ./configs/yolov5/yolov5n.yaml --image_path ./coco/image/val2017/image.jpg
+
+

脚本说明

+
    +
  • predict.py 支持单张图片推理
  • +
  • test.py 支持COCO数据集推理
  • +
  • 注意:当前只支持在Ascend 310上推理
  • +
+

MindX部署

+

环境配置

+

参考:MindX环境准备
+注意:MindX目前支持的python版本为3.9,请在安装MindX前,准备好python3.9的环境

+
    +
  1. +

    在MindX官网获取环境安装包,目前支持3.0.0版本MindX推理

    +
  2. +
  3. +

    跳转至下载页面下载Ascend-mindxsdk-mxmanufacture_{version}_linux-{arch}.run

    +
  4. +
  5. +

    将安装包放置于Ascend310机器目录中并解压

    +
  6. +
  7. +

    如不是root用户,需增加对套件包的可执行权限: +

    chmod +x Ascend-mindxsdk-mxmanufacture_{version}_linux-{arch}.run
    +

    +
  8. +
  9. 进入开发套件包的上传路径,安装mxManufacture开发套件包。 +
    ./Ascend-mindxsdk-mxmanufacture_{version}_linux-{arch}.run --install
    +
    +安装完成后,若出现如下回显,表示软件成功安装。 +
    The installation is successfully
    +
    +安装完成后,mxManufacture软件目录结构如下所示: +
    .
    +├── bin
    +├── config
    +├── filelist.txt
    +├── include
    +├── lib
    +├── opensource
    +├── operators
    +├── python
    +├── samples
    +├── set_env.sh
    +├── toolkit
    +└── version.info
    +
  10. +
  11. 进入mxmanufacture的安装目录,运行以下命令,使MindX SDK环境变量生效。 +
    source set_env.sh
    +
  12. +
  13. 进入./mxVision-3.0.0/python/,安装mindx-3.0.0-py3-none-any.whl +
    pip install mindx-3.0.0-py3-none-any.whl
    +
  14. +
+

模型转换

+
    +
  1. +

    ckpt模型转为air模型,此步骤需要在Ascend910上操作 +

    python ./deploy/export.py --config ./path_to_config/model.yaml --weight ./path_to_ckpt/weight.ckpt --per_batch_size 1 --file_format AIR
    +e.g.
    +python ./deploy/export.py --config ./configs/yolov5/yolov5n.yaml --weight yolov5n_300e_mAP273-9b16bd7b.ckpt --per_batch_size 1 --file_format AIR
    +
    + yolov7需要在2.0版本以上的Ascend910机器运行export

    +
  2. +
  3. +

    air模型转为om模型,使用atc转换工具,此步骤需安装MindX环境,在Ascend310上运行 +

    atc --model=./path_to_air/weight.air --framework=1 --output=yolo  --soc_version=Ascend310
    +

    +
  4. +
+

MindX Test

+

对COCO数据推理: +

python ./deploy/test.py --model_type MindX --model_path ./path_to_om/weight.om --config ./path_to_config/yolo.yaml
+e.g.
+python ./deploy/test.py --model_type MindX --model_path ./yolov5n.om --config ./configs/yolov5/yolov5n.yaml
+

+

MindX Predict

+

对单张图片推理: +

python ./deploy/predict.py --model_type MindX --model_path ./path_to_om/weight.om --config ./path_to_config/yolo.yaml --image_path ./path_to_image/image.jpg
+e.g.
+python ./deploy/predict.py --model_type MindX --model_path ./yolov5n.om --config ./configs/yolov5/yolov5n.yaml --image_path ./coco/image/val2017/image.jpg
+

+

MindIR部署

+

环境要求

+

mindspore>=2.1

+

注意事项

+
    +
  1. +

    当前仅支持Predict

    +
  2. +
  3. +

    理论上也可在Ascend910上运行,未测试

    +
  4. +
+

模型转换

+

ckpt模型转为mindir模型,此步骤可在CPU上运行 +

python ./deploy/export.py --config ./path_to_config/model.yaml --weight ./path_to_ckpt/weight.ckpt --per_batch_size 1 --file_format MINDIR --device_target CPU
+e.g.
+# 在CPU上运行
+python ./deploy/export.py --config ./configs/yolov5/yolov5n.yaml --weight yolov5n_300e_mAP273-9b16bd7b.ckpt --per_batch_size 1 --file_format MINDIR --device_target CPU
+

+

MindIR Test

+

敬请期待

+

MindIR Predict

+

对单张图片推理: +

python ./deploy/predict.py --model_type MindIR --model_path ./path_to_mindir/weight.mindir --config ./path_to_conifg/yolo.yaml --image_path ./path_to_image/image.jpg
+e.g.
+python deploy/predict.py --model_type MindIR --model_path ./yolov5n.mindir --config ./configs/yolov5/yolov5n.yaml --image_path ./coco/image/val2017/image.jpg
+

+

ONNX部署

+

注意: 仅部分模型支持导出ONNX并使用ONNXRuntime进行部署

+

环境配置

+
pip install onnx>=1.9.0
+pip install onnxruntime>=1.8.0
+
+

注意事项

+
    +
  1. +

    当前并非所有mindyolo均支持ONNX导出和推理(仅以YoloV3为例)

    +
  2. +
  3. +

    当前仅支持Predict功能

    +
  4. +
  5. +

    导出ONNX需要调整nn.SiLU算子,采用sigmoid算子底层实现

    +
  6. +
+

例如:添加如下自定义层并替换mindyolo中所有的nn.SiLU +

class EdgeSiLU(nn.Cell):
+    """
+    SiLU activation function: x * sigmoid(x). To support for onnx export with nn.SiLU.
+    """
+
+    def __init__(self):
+        super().__init__()
+
+    def construct(self, x):
+        return x * ops.sigmoid(x)
+

+

模型转换

+

ckpt模型转为ONNX模型,此步骤以及Test步骤均仅支持CPU上运行 +

python ./deploy/export.py --config ./path_to_config/model.yaml --weight ./path_to_ckpt/weight.ckpt --per_batch_size 1 --file_format ONNX --device_target [CPU]
+e.g.
+# 在CPU上运行
+python ./deploy/export.py --config ./configs/yolov3/yolov3.yaml --weight yolov3-darknet53_300e_mAP455-adfb27af.ckpt --per_batch_size 1 --file_format ONNX --device_target CPU
+

+

ONNX Test

+

敬请期待

+

ONNXRuntime Predict

+

对单张图片推理: +

python ./deploy/predict.py --model_type ONNX --model_path ./path_to_onnx_model/model.onnx --config ./path_to_config/yolo.yaml --image_path ./path_to_image/image.jpg
+e.g.
+python ./deploy/predict.py --model_type ONNX --model_path ./yolov3.onnx --config ./configs/yolov3/yolov3.yaml --image_path ./coco/image/val2017/image.jpg
+

+

标准和支持的模型库

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameScaleContextImageSizeDatasetBox mAP (%)ParamsFLOPsRecipeDownload
YOLOv8ND310x1-G640MS COCO 201737.23.2M8.7Gyamlckpt
mindir
YOLOv8SD310x1-G640MS COCO 201744.611.2M28.6Gyamlckpt
mindir
YOLOv8MD310x1-G640MS COCO 201750.525.9M78.9Gyamlckpt
mindir
YOLOv8LD310x1-G640MS COCO 201752.843.7M165.2Gyamlckpt
mindir
YOLOv8XD310x1-G640MS COCO 201753.768.2M257.8Gyamlckpt
mindir
YOLOv7TinyD310x1-G640MS COCO 201737.56.2M13.8Gyamlckpt
mindir
YOLOv7LD310x1-G640MS COCO 201750.836.9M104.7Gyamlckpt
mindir
YOLOv7XD310x1-G640MS COCO 201752.471.3M189.9Gyamlckpt
mindir
YOLOv5ND310x1-G640MS COCO 201727.31.9M4.5Gyamlckpt
mindir
YOLOv5SD310x1-G640MS COCO 201737.67.2M16.5Gyamlckpt
mindir
YOLOv5MD310x1-G640MS COCO 201744.921.2M49.0Gyamlckpt
mindir
YOLOv5LD310x1-G640MS COCO 201748.546.5M109.1Gyamlckpt
mindir
YOLOv5XD310x1-G640MS COCO 201750.586.7M205.7Gyamlckpt
mindir
YOLOv4CSPDarknet53D310x1-G608MS COCO 201745.427.6M52Gyamlckpt
mindir
YOLOv4CSPDarknet53(silu)D310x1-G640MS COCO 201745.827.6M52Gyamlckpt
mindir
YOLOv3Darknet53D310x1-G640MS COCO 201745.561.9M156.4Gyamlckpt
mindir
YOLOXND310x1-G416MS COCO 201724.10.9M1.1Gyamlckpt
mindir
YOLOXTinyD310x1-G416MS COCO 201733.35.1M6.5Gyamlckpt
mindir
YOLOXSD310x1-G640MS COCO 201740.79.0M26.8Gyamlckpt
mindir
YOLOXMD310x1-G640MS COCO 201746.725.3M73.8Gyamlckpt
mindir
YOLOXLD310x1-G640MS COCO 201749.254.2M155.6Gyamlckpt
mindir
YOLOXXD310x1-G640MS COCO 201751.699.1M281.9Gyamlckpt
mindir
YOLOXDarknet53D310x1-G640MS COCO 201747.763.7M185.3Gyamlckpt
mindir
+


+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/tutorials/finetune/index.html b/zh/tutorials/finetune/index.html new file mode 100644 index 00000000..826252d6 --- /dev/null +++ b/zh/tutorials/finetune/index.html @@ -0,0 +1,1381 @@ + + + + + + + + + + + + + + + + + + + + + + + + 微调 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+ +
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

微调

+

自定义数据集finetune流程

+

本文以安全帽佩戴检测数据集(SHWD)为例,介绍自定义数据集在MindYOLO上进行finetune的主要流程。

+

数据集格式转换

+

SHWD数据集采用voc格式的数据标注,其文件目录如下所示: +

             ROOT_DIR
+                ├── Annotations
+                │        ├── 000000.xml
+                │        └── 000002.xml
+                ├── ImageSets
+                │       └── Main
+                │             ├── test.txt
+                │             ├── train.txt
+                │             ├── trainval.txt
+                │             └── val.txt
+                └── JPEGImages
+                        ├── 000000.jpg
+                        └── 000002.jpg
+
+Annotations文件夹下的xml文件为每张图片的标注信息,主要内容如下: +
<annotation>
+  <folder>JPEGImages</folder>
+  <filename>000377.jpg</filename>
+  <path>F:\baidu\VOC2028\JPEGImages\000377.jpg</path>
+  <source>
+    <database>Unknown</database>
+  </source>
+  <size>
+    <width>750</width>
+    <height>558</height>
+    <depth>3</depth>
+  </size>
+  <segmented>0</segmented>
+  <object>
+    <name>hat</name>
+    <pose>Unspecified</pose>
+    <truncated>0</truncated>
+    <difficult>0</difficult>
+    <bndbox>
+      <xmin>142</xmin>
+      <ymin>388</ymin>
+      <xmax>177</xmax>
+      <ymax>426</ymax>
+    </bndbox>
+  </object>
+
+其中包含多个object, object中的name为类别名称,xmin, ymin, xmax, ymax则为检测框左上角和右下角的坐标。

+

MindYOLO支持的数据集格式为YOLO格式,详情可参考数据准备

+

由于MindYOLO在验证阶段选用图片名称作为image_id,因此图片名称只能为数值类型,而不能为字符串类型,还需要对图片进行改名。对SHWD数据集格式的转换包含如下步骤: +* 将图片复制到相应的路径下并改名 +* 在根目录下相应的txt文件中写入该图片的相对路径 +* 解析xml文件,在相应路径下生成对应的txt标注文件 +* 验证集还需生成最终的json文件

+

详细实现可参考convert_shwd2yolo.py,运行方式如下:

+

python examples/finetune_SHWD/convert_shwd2yolo.py --root_dir /path_to_shwd/SHWD
+
+运行以上命令将在不改变原数据集的前提下,在同级目录生成yolo格式的SHWD数据集。

+

编写yaml配置文件

+

配置文件主要包含数据集、数据增强、loss、optimizer、模型结构涉及的相应参数,由于MindYOLO提供yaml文件继承机制,可只将需要调整的参数编写为yolov7-tiny_shwd.yaml,并继承MindYOLO提供的原生yaml文件即可,其内容如下: +

__BASE__: [
+  '../../configs/yolov7/yolov7-tiny.yaml',
+]
+
+per_batch_size: 16 # 单卡batchsize,总的batchsize=per_batch_size * device_num
+img_size: 640 # image sizes
+weight: ./yolov7-tiny_pretrain.ckpt
+strict_load: False # 是否按严格加载ckpt内参数,默认True,若设成False,当分类数不一致,丢掉最后一层分类器的weight
+log_interval: 10 # 每log_interval次迭代打印一次loss结果
+
+data:
+  dataset_name: shwd
+  train_set: ./SHWD/train.txt # 实际训练数据路径
+  val_set: ./SHWD/val.txt
+  test_set: ./SHWD/val.txt
+  nc: 2 # 分类数
+  # class names
+  names: [ 'person',  'hat' ] # 每一类的名字
+
+optimizer:
+  lr_init: 0.001  # initial learning rate
+
+* __BASE__为一个列表,表示继承的yaml文件所在路径,可以继承多个yaml文件 +* per_batch_size和img_size分别表示单卡上的batch_size和数据处理图片采用的图片尺寸 +* weight为上述提到的预训练模型的文件路径,strict_load表示丢弃shape不一致的参数 +* log_interval表示日志打印间隔 +* data字段下全部为数据集相关参数,其中dataset_name为自定义数据集名称,train_set、val_set、test_set分别为保存训练集、验证集、测试集图片路径的txt文件路径,nc为类别数量,names为类别名称 +* optimizer字段下的lr_init为经过warm_up之后的初始化学习率,此处相比默认参数缩小了10倍

+

参数继承关系和参数说明可参考configuration

+

下载预训练模型

+

可选用MindYOLO提供的模型仓库作为自定义数据集的预训练模型,预训练模型在COCO数据集上已经有较好的精度表现,相比从头训练,加载预训练模型一般会拥有更快的收敛速度以及更高的最终精度,并且大概率能避免初始化不当导致的梯度消失、梯度爆炸等问题。

+

自定义数据集类别数通常与COCO数据集不一致,MindYOLO中各模型的检测头head结构跟数据集类别数有关,直接将预训练模型导入可能会因为shape不一致而导入失败,可以在yaml配置文件中设置strict_load参数为False,MindYOLO将自动舍弃shape不一致的参数,并抛出该module参数并未导入的告警

+

模型微调(Finetune)

+

模型微调过程中,可首先按照默认配置进行训练,如效果不佳,可考虑调整以下参数: +* 学习率可调小一些,防止loss难以收敛 +* per_batch_size可根据实际显存占用调整,通常per_batch_size越大,梯度计算越精确 +* epochs可根据loss是否收敛进行调整 +* anchor可根据实际物体大小进行调整

+

由于SHWD训练集只有约6000张图片,选用yolov7-tiny模型进行训练。 +* 在多卡NPU/GPU上进行分布式模型训练,以8卡为例:

+
mpirun --allow-run-as-root -n 8 python train.py --config ./examples/finetune_SHWD/yolov7-tiny_shwd.yaml --is_parallel True
+
+
    +
  • 在单卡NPU/GPU/CPU上训练模型:
  • +
+

python train.py --config ./examples/finetune_SHWD/yolov7-tiny_shwd.yaml 
+
+注意:直接用yolov7-tiny默认参数在SHWD数据集上训练,可取得AP50 87.0的精度。将lr_init参数由0.01改为0.001,即可实现ap50为89.2的精度结果。

+

可视化推理

+

使用/demo/predict.py即可用训练好的模型进行可视化推理,运行方式如下:

+

python demo/predict.py --config ./examples/finetune_SHWD/yolov7-tiny_shwd.yaml --weight=/path_to_ckpt/WEIGHT.ckpt --image_path /path_to_image/IMAGE.jpg
+
+推理效果如下:

+
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/tutorials/modelarts/index.html b/zh/tutorials/modelarts/index.html new file mode 100644 index 00000000..d6afc915 --- /dev/null +++ b/zh/tutorials/modelarts/index.html @@ -0,0 +1,1286 @@ + + + + + + + + + + + + + + + + + + + + + + + + 云上启动 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

MindYOLO ModelArts训练快速入门

+

本文主要介绍MindYOLO借助ModelArts平台的训练方法。 +ModelArts相关教程参考帮助中心

+

准备数据及代码

+

使用OBS服务上传数据集,相关操作教程见OBS用户指南,获取本账户的AK,服务器地址请咨询对应平台管理员或账号负责人,如AK不在用户指南指定位置,也请咨询平台管理员或账号负责人。
+操作: +1. 登录obs browser+ +obs +2. 创建桶 -> 新建文件夹(如:coco) +桶 +3. 上传数据文件,请将数据文件统一单独放置在一个文件夹内(即用例中的coco),代码中会对obs桶内数据做拷贝,拷贝内容为此文件夹(如:coco)下所有的文件。如未新建文件夹,就无法选择完整数据集。 +数据集

+

准备代码

+

同样使用OBS服务上传训练代码。
+操作:创建桶 -> 新建文件夹(如:mindyolo)-> 上传代码文件,在mindyolo同层级下创建output文件夹用于存放训练记录,创建log文件夹用于存放日志。 +桶目录 +套件代码

+

新建算法

+
    +
  1. 在选项卡中选择算法管理->创建。 +创建算法
  2. +
  3. 自定义算法名称,预制框架选择Ascend-Powered-Engine,master分支请选择MindSpore-2.0版本镜像,r0.1分支请选择MindSpore-1.8.1版本镜像,设置代码目录、启动文件、输入、输出以及超参。 +算法配置
  4. +
+
    +
  • 如需加载预训练权重,可在选择模型中选择已上传的模型文件,并在运行参数中增加ckpt_dir参数 +ckpt
  • +
  • 启动文件为train.py
  • +
  • 运行超参需添加enable_modelarts,值为True
  • +
  • 运行超参config路径参考训练作业中运行环境预览的目录,如/home/ma-user/modelarts/user-job-dir/mindyolo/configs/yolov5/yolov5n.yaml
  • +
  • 如涉及分布式训练场景,需增加超参is_parallel,并在分布式运行时设置为True,单卡时为False
  • +
+

新建作业

+
    +
  1. 在ModelArts服务中选择:训练管理 -> 训练作业 -> 创建训练作业,设置作业名称,选择不纳入实验;创建方式->我的算法选择刚才新建的算法; +task +task1
  2. +
  3. 训练输入->数据存储位置,选择刚才创建的obs数据桶(示例中为coco),训练输出选择准备代码时的output文件夹,并根据运行环境预览设置好config超参值; +task2
  4. +
  5. 选择资源池、规格、计算节点,作业日志路径选择创建代码时的log文件夹 +task3 +规格
  6. +
  7. 提交训练,排队后会进入运行中
  8. +
+

修改作业

+

在训练作业页面选择重建,可修改选择的作业配置

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/tutorials/quick_start/index.html b/zh/tutorials/quick_start/index.html new file mode 100644 index 00000000..fb87ba8c --- /dev/null +++ b/zh/tutorials/quick_start/index.html @@ -0,0 +1,1322 @@ + + + + + + + + + + + + + + + + + + + + + + + + 快速开始 - MindYOLO Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+ +
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

快速开始

+

MindYOLO 快速入门

+

本文简要介绍MindYOLO中内置的命令行工具的使用方法。

+

使用预训练模型进行推理

+
    +
  1. 模型仓库中选择一个模型及其配置文件,例如, ./configs/yolov7/yolov7.yaml.
  2. +
  3. 模型仓库中下载相应的预训练模型权重文件。
  4. +
  5. 使用内置配置进行推理,请运行以下命令:
  6. +
+
# NPU (默认)
+python demo/predict.py --config ./configs/yolov7/yolov7.yaml --weight=/path_to_ckpt/WEIGHT.ckpt --image_path /path_to_image/IMAGE.jpg
+
+# GPU
+python demo/predict.py --config ./configs/yolov7/yolov7.yaml --weight=/path_to_ckpt/WEIGHT.ckpt --image_path /path_to_image/IMAGE.jpg --device_target=GPU
+
+

有关命令行参数的详细信息,请参阅demo/predict.py -h,或查看其源代码

+
    +
  • 要在CPU上运行,请将device_target的值修改为CPU.
  • +
  • 结果将保存在./detect_results目录下
  • +
+

使用命令行进行训练和评估

+
    +
  • 按照YOLO格式准备您的数据集。如果使用COCO数据集(YOLO格式)进行训练,请从yolov5或darknet准备数据集.
  • +
+
+ +
  coco/
+    {train,val}2017.txt
+    annotations/
+      instances_{train,val}2017.json
+    images/
+      {train,val}2017/
+          00000001.jpg
+          ...
+          # image files that are mentioned in the corresponding train/val2017.txt
+    labels/
+      {train,val}2017/
+          00000001.txt
+          ...
+          # label files that are mentioned in the corresponding train/val2017.txt
+
+
+ +
    +
  • 在多卡NPU/GPU上进行分布式模型训练,以8卡为例:
  • +
+
mpirun --allow-run-as-root -n 8 python train.py --config ./configs/yolov7/yolov7.yaml  --is_parallel True
+
+
    +
  • 在单卡NPU/GPU/CPU上训练模型:
  • +
+
python train.py --config ./configs/yolov7/yolov7.yaml 
+
+
    +
  • 在单卡NPU/GPU/CPU上评估模型的精度:
  • +
+

python test.py --config ./configs/yolov7/yolov7.yaml --weight /path_to_ckpt/WEIGHT.ckpt
+
+* 在多卡NPU/GPU上进行分布式评估模型的精度:

+
mpirun --allow-run-as-root -n 8 python test.py --config ./configs/yolov7/yolov7.yaml --weight /path_to_ckpt/WEIGHT.ckpt --is_parallel True
+
+

注意:

+

(1) 默认超参为8卡训练,单卡情况需调整部分参数。

+

(2) 默认设备为Ascend,您可以指定'device_target'的值为Ascend/GPU/CPU。

+

(3) 有关更多选项,请参阅 train/test.py -h

+

(4) 在CloudBrain上进行训练,请在这里查看

+

部署

+

请在部署查看.

+

在代码中使用MindYOLO API

+

敬请期待

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file