diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 000000000..e69de29bb diff --git a/404.html b/404.html new file mode 100644 index 000000000..0b0e6121c --- /dev/null +++ b/404.html @@ -0,0 +1,946 @@ + + + + + + + + + + + + + + + + + + MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ +

404 - Not found

+ +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/__pycache__/gen_ref_pages.cpython-38.pyc b/__pycache__/gen_ref_pages.cpython-38.pyc new file mode 100644 index 000000000..fa205b2c7 Binary files /dev/null and b/__pycache__/gen_ref_pages.cpython-38.pyc differ diff --git a/assets/_mkdocstrings.css b/assets/_mkdocstrings.css new file mode 100644 index 000000000..049a254b9 --- /dev/null +++ b/assets/_mkdocstrings.css @@ -0,0 +1,64 @@ + +/* Avoid breaking parameter names, etc. in table cells. */ +.doc-contents td code { + word-break: normal !important; +} + +/* No line break before first paragraph of descriptions. */ +.doc-md-description, +.doc-md-description>p:first-child { + display: inline; +} + +/* Max width for docstring sections tables. */ +.doc .md-typeset__table, +.doc .md-typeset__table table { + display: table !important; + width: 100%; +} + +.doc .md-typeset__table tr { + display: table-row; +} + +/* Defaults in Spacy table style. */ +.doc-param-default { + float: right; +} + +/* Keep headings consistent. */ +h1.doc-heading, +h2.doc-heading, +h3.doc-heading, +h4.doc-heading, +h5.doc-heading, +h6.doc-heading { + font-weight: 400; + line-height: 1.5; + color: inherit; + text-transform: none; +} + +h1.doc-heading { + font-size: 1.6rem; +} + +h2.doc-heading { + font-size: 1.2rem; +} + +h3.doc-heading { + font-size: 1.15rem; +} + +h4.doc-heading { + font-size: 1.10rem; +} + +h5.doc-heading { + font-size: 1.05rem; +} + +h6.doc-heading { + font-size: 1rem; +} \ No newline at end of file diff --git a/assets/images/favicon.png b/assets/images/favicon.png new file mode 100644 index 000000000..1cf13b9f9 Binary files /dev/null and b/assets/images/favicon.png differ diff --git a/assets/javascripts/bundle.220ee61c.min.js b/assets/javascripts/bundle.220ee61c.min.js new file mode 100644 index 000000000..116072a11 --- /dev/null +++ b/assets/javascripts/bundle.220ee61c.min.js @@ -0,0 +1,29 @@ +"use strict";(()=>{var Ci=Object.create;var gr=Object.defineProperty;var Ri=Object.getOwnPropertyDescriptor;var ki=Object.getOwnPropertyNames,Ht=Object.getOwnPropertySymbols,Hi=Object.getPrototypeOf,yr=Object.prototype.hasOwnProperty,nn=Object.prototype.propertyIsEnumerable;var rn=(e,t,r)=>t in e?gr(e,t,{enumerable:!0,configurable:!0,writable:!0,value:r}):e[t]=r,P=(e,t)=>{for(var r in t||(t={}))yr.call(t,r)&&rn(e,r,t[r]);if(Ht)for(var r of Ht(t))nn.call(t,r)&&rn(e,r,t[r]);return e};var on=(e,t)=>{var r={};for(var n in e)yr.call(e,n)&&t.indexOf(n)<0&&(r[n]=e[n]);if(e!=null&&Ht)for(var n of Ht(e))t.indexOf(n)<0&&nn.call(e,n)&&(r[n]=e[n]);return r};var Pt=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports);var Pi=(e,t,r,n)=>{if(t&&typeof t=="object"||typeof t=="function")for(let o of ki(t))!yr.call(e,o)&&o!==r&&gr(e,o,{get:()=>t[o],enumerable:!(n=Ri(t,o))||n.enumerable});return e};var yt=(e,t,r)=>(r=e!=null?Ci(Hi(e)):{},Pi(t||!e||!e.__esModule?gr(r,"default",{value:e,enumerable:!0}):r,e));var sn=Pt((xr,an)=>{(function(e,t){typeof xr=="object"&&typeof an!="undefined"?t():typeof define=="function"&&define.amd?define(t):t()})(xr,function(){"use strict";function e(r){var n=!0,o=!1,i=null,s={text:!0,search:!0,url:!0,tel:!0,email:!0,password:!0,number:!0,date:!0,month:!0,week:!0,time:!0,datetime:!0,"datetime-local":!0};function a(O){return!!(O&&O!==document&&O.nodeName!=="HTML"&&O.nodeName!=="BODY"&&"classList"in O&&"contains"in O.classList)}function f(O){var Qe=O.type,De=O.tagName;return!!(De==="INPUT"&&s[Qe]&&!O.readOnly||De==="TEXTAREA"&&!O.readOnly||O.isContentEditable)}function c(O){O.classList.contains("focus-visible")||(O.classList.add("focus-visible"),O.setAttribute("data-focus-visible-added",""))}function u(O){O.hasAttribute("data-focus-visible-added")&&(O.classList.remove("focus-visible"),O.removeAttribute("data-focus-visible-added"))}function p(O){O.metaKey||O.altKey||O.ctrlKey||(a(r.activeElement)&&c(r.activeElement),n=!0)}function m(O){n=!1}function d(O){a(O.target)&&(n||f(O.target))&&c(O.target)}function h(O){a(O.target)&&(O.target.classList.contains("focus-visible")||O.target.hasAttribute("data-focus-visible-added"))&&(o=!0,window.clearTimeout(i),i=window.setTimeout(function(){o=!1},100),u(O.target))}function v(O){document.visibilityState==="hidden"&&(o&&(n=!0),Y())}function Y(){document.addEventListener("mousemove",N),document.addEventListener("mousedown",N),document.addEventListener("mouseup",N),document.addEventListener("pointermove",N),document.addEventListener("pointerdown",N),document.addEventListener("pointerup",N),document.addEventListener("touchmove",N),document.addEventListener("touchstart",N),document.addEventListener("touchend",N)}function B(){document.removeEventListener("mousemove",N),document.removeEventListener("mousedown",N),document.removeEventListener("mouseup",N),document.removeEventListener("pointermove",N),document.removeEventListener("pointerdown",N),document.removeEventListener("pointerup",N),document.removeEventListener("touchmove",N),document.removeEventListener("touchstart",N),document.removeEventListener("touchend",N)}function N(O){O.target.nodeName&&O.target.nodeName.toLowerCase()==="html"||(n=!1,B())}document.addEventListener("keydown",p,!0),document.addEventListener("mousedown",m,!0),document.addEventListener("pointerdown",m,!0),document.addEventListener("touchstart",m,!0),document.addEventListener("visibilitychange",v,!0),Y(),r.addEventListener("focus",d,!0),r.addEventListener("blur",h,!0),r.nodeType===Node.DOCUMENT_FRAGMENT_NODE&&r.host?r.host.setAttribute("data-js-focus-visible",""):r.nodeType===Node.DOCUMENT_NODE&&(document.documentElement.classList.add("js-focus-visible"),document.documentElement.setAttribute("data-js-focus-visible",""))}if(typeof window!="undefined"&&typeof document!="undefined"){window.applyFocusVisiblePolyfill=e;var t;try{t=new CustomEvent("focus-visible-polyfill-ready")}catch(r){t=document.createEvent("CustomEvent"),t.initCustomEvent("focus-visible-polyfill-ready",!1,!1,{})}window.dispatchEvent(t)}typeof document!="undefined"&&e(document)})});var cn=Pt(Er=>{(function(e){var t=function(){try{return!!Symbol.iterator}catch(c){return!1}},r=t(),n=function(c){var u={next:function(){var p=c.shift();return{done:p===void 0,value:p}}};return r&&(u[Symbol.iterator]=function(){return u}),u},o=function(c){return encodeURIComponent(c).replace(/%20/g,"+")},i=function(c){return decodeURIComponent(String(c).replace(/\+/g," "))},s=function(){var c=function(p){Object.defineProperty(this,"_entries",{writable:!0,value:{}});var m=typeof p;if(m!=="undefined")if(m==="string")p!==""&&this._fromString(p);else if(p instanceof c){var d=this;p.forEach(function(B,N){d.append(N,B)})}else if(p!==null&&m==="object")if(Object.prototype.toString.call(p)==="[object Array]")for(var h=0;hd[0]?1:0}),c._entries&&(c._entries={});for(var p=0;p1?i(d[1]):"")}})})(typeof global!="undefined"?global:typeof window!="undefined"?window:typeof self!="undefined"?self:Er);(function(e){var t=function(){try{var o=new e.URL("b","http://a");return o.pathname="c d",o.href==="http://a/c%20d"&&o.searchParams}catch(i){return!1}},r=function(){var o=e.URL,i=function(f,c){typeof f!="string"&&(f=String(f)),c&&typeof c!="string"&&(c=String(c));var u=document,p;if(c&&(e.location===void 0||c!==e.location.href)){c=c.toLowerCase(),u=document.implementation.createHTMLDocument(""),p=u.createElement("base"),p.href=c,u.head.appendChild(p);try{if(p.href.indexOf(c)!==0)throw new Error(p.href)}catch(O){throw new Error("URL unable to set base "+c+" due to "+O)}}var m=u.createElement("a");m.href=f,p&&(u.body.appendChild(m),m.href=m.href);var d=u.createElement("input");if(d.type="url",d.value=f,m.protocol===":"||!/:/.test(m.href)||!d.checkValidity()&&!c)throw new TypeError("Invalid URL");Object.defineProperty(this,"_anchorElement",{value:m});var h=new e.URLSearchParams(this.search),v=!0,Y=!0,B=this;["append","delete","set"].forEach(function(O){var Qe=h[O];h[O]=function(){Qe.apply(h,arguments),v&&(Y=!1,B.search=h.toString(),Y=!0)}}),Object.defineProperty(this,"searchParams",{value:h,enumerable:!0});var N=void 0;Object.defineProperty(this,"_updateSearchParams",{enumerable:!1,configurable:!1,writable:!1,value:function(){this.search!==N&&(N=this.search,Y&&(v=!1,this.searchParams._fromString(this.search),v=!0))}})},s=i.prototype,a=function(f){Object.defineProperty(s,f,{get:function(){return this._anchorElement[f]},set:function(c){this._anchorElement[f]=c},enumerable:!0})};["hash","host","hostname","port","protocol"].forEach(function(f){a(f)}),Object.defineProperty(s,"search",{get:function(){return this._anchorElement.search},set:function(f){this._anchorElement.search=f,this._updateSearchParams()},enumerable:!0}),Object.defineProperties(s,{toString:{get:function(){var f=this;return function(){return f.href}}},href:{get:function(){return this._anchorElement.href.replace(/\?$/,"")},set:function(f){this._anchorElement.href=f,this._updateSearchParams()},enumerable:!0},pathname:{get:function(){return this._anchorElement.pathname.replace(/(^\/?)/,"/")},set:function(f){this._anchorElement.pathname=f},enumerable:!0},origin:{get:function(){var f={"http:":80,"https:":443,"ftp:":21}[this._anchorElement.protocol],c=this._anchorElement.port!=f&&this._anchorElement.port!=="";return this._anchorElement.protocol+"//"+this._anchorElement.hostname+(c?":"+this._anchorElement.port:"")},enumerable:!0},password:{get:function(){return""},set:function(f){},enumerable:!0},username:{get:function(){return""},set:function(f){},enumerable:!0}}),i.createObjectURL=function(f){return o.createObjectURL.apply(o,arguments)},i.revokeObjectURL=function(f){return o.revokeObjectURL.apply(o,arguments)},e.URL=i};if(t()||r(),e.location!==void 0&&!("origin"in e.location)){var n=function(){return e.location.protocol+"//"+e.location.hostname+(e.location.port?":"+e.location.port:"")};try{Object.defineProperty(e.location,"origin",{get:n,enumerable:!0})}catch(o){setInterval(function(){e.location.origin=n()},100)}}})(typeof global!="undefined"?global:typeof window!="undefined"?window:typeof self!="undefined"?self:Er)});var qr=Pt((Mt,Nr)=>{/*! + * clipboard.js v2.0.11 + * https://clipboardjs.com/ + * + * Licensed MIT © Zeno Rocha + */(function(t,r){typeof Mt=="object"&&typeof Nr=="object"?Nr.exports=r():typeof define=="function"&&define.amd?define([],r):typeof Mt=="object"?Mt.ClipboardJS=r():t.ClipboardJS=r()})(Mt,function(){return function(){var e={686:function(n,o,i){"use strict";i.d(o,{default:function(){return Ai}});var s=i(279),a=i.n(s),f=i(370),c=i.n(f),u=i(817),p=i.n(u);function m(j){try{return document.execCommand(j)}catch(T){return!1}}var d=function(T){var E=p()(T);return m("cut"),E},h=d;function v(j){var T=document.documentElement.getAttribute("dir")==="rtl",E=document.createElement("textarea");E.style.fontSize="12pt",E.style.border="0",E.style.padding="0",E.style.margin="0",E.style.position="absolute",E.style[T?"right":"left"]="-9999px";var H=window.pageYOffset||document.documentElement.scrollTop;return E.style.top="".concat(H,"px"),E.setAttribute("readonly",""),E.value=j,E}var Y=function(T,E){var H=v(T);E.container.appendChild(H);var I=p()(H);return m("copy"),H.remove(),I},B=function(T){var E=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body},H="";return typeof T=="string"?H=Y(T,E):T instanceof HTMLInputElement&&!["text","search","url","tel","password"].includes(T==null?void 0:T.type)?H=Y(T.value,E):(H=p()(T),m("copy")),H},N=B;function O(j){"@babel/helpers - typeof";return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?O=function(E){return typeof E}:O=function(E){return E&&typeof Symbol=="function"&&E.constructor===Symbol&&E!==Symbol.prototype?"symbol":typeof E},O(j)}var Qe=function(){var T=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},E=T.action,H=E===void 0?"copy":E,I=T.container,q=T.target,Me=T.text;if(H!=="copy"&&H!=="cut")throw new Error('Invalid "action" value, use either "copy" or "cut"');if(q!==void 0)if(q&&O(q)==="object"&&q.nodeType===1){if(H==="copy"&&q.hasAttribute("disabled"))throw new Error('Invalid "target" attribute. Please use "readonly" instead of "disabled" attribute');if(H==="cut"&&(q.hasAttribute("readonly")||q.hasAttribute("disabled")))throw new Error(`Invalid "target" attribute. You can't cut text from elements with "readonly" or "disabled" attributes`)}else throw new Error('Invalid "target" value, use a valid Element');if(Me)return N(Me,{container:I});if(q)return H==="cut"?h(q):N(q,{container:I})},De=Qe;function $e(j){"@babel/helpers - typeof";return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?$e=function(E){return typeof E}:$e=function(E){return E&&typeof Symbol=="function"&&E.constructor===Symbol&&E!==Symbol.prototype?"symbol":typeof E},$e(j)}function Ei(j,T){if(!(j instanceof T))throw new TypeError("Cannot call a class as a function")}function tn(j,T){for(var E=0;E0&&arguments[0]!==void 0?arguments[0]:{};this.action=typeof I.action=="function"?I.action:this.defaultAction,this.target=typeof I.target=="function"?I.target:this.defaultTarget,this.text=typeof I.text=="function"?I.text:this.defaultText,this.container=$e(I.container)==="object"?I.container:document.body}},{key:"listenClick",value:function(I){var q=this;this.listener=c()(I,"click",function(Me){return q.onClick(Me)})}},{key:"onClick",value:function(I){var q=I.delegateTarget||I.currentTarget,Me=this.action(q)||"copy",kt=De({action:Me,container:this.container,target:this.target(q),text:this.text(q)});this.emit(kt?"success":"error",{action:Me,text:kt,trigger:q,clearSelection:function(){q&&q.focus(),window.getSelection().removeAllRanges()}})}},{key:"defaultAction",value:function(I){return vr("action",I)}},{key:"defaultTarget",value:function(I){var q=vr("target",I);if(q)return document.querySelector(q)}},{key:"defaultText",value:function(I){return vr("text",I)}},{key:"destroy",value:function(){this.listener.destroy()}}],[{key:"copy",value:function(I){var q=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body};return N(I,q)}},{key:"cut",value:function(I){return h(I)}},{key:"isSupported",value:function(){var I=arguments.length>0&&arguments[0]!==void 0?arguments[0]:["copy","cut"],q=typeof I=="string"?[I]:I,Me=!!document.queryCommandSupported;return q.forEach(function(kt){Me=Me&&!!document.queryCommandSupported(kt)}),Me}}]),E}(a()),Ai=Li},828:function(n){var o=9;if(typeof Element!="undefined"&&!Element.prototype.matches){var i=Element.prototype;i.matches=i.matchesSelector||i.mozMatchesSelector||i.msMatchesSelector||i.oMatchesSelector||i.webkitMatchesSelector}function s(a,f){for(;a&&a.nodeType!==o;){if(typeof a.matches=="function"&&a.matches(f))return a;a=a.parentNode}}n.exports=s},438:function(n,o,i){var s=i(828);function a(u,p,m,d,h){var v=c.apply(this,arguments);return u.addEventListener(m,v,h),{destroy:function(){u.removeEventListener(m,v,h)}}}function f(u,p,m,d,h){return typeof u.addEventListener=="function"?a.apply(null,arguments):typeof m=="function"?a.bind(null,document).apply(null,arguments):(typeof u=="string"&&(u=document.querySelectorAll(u)),Array.prototype.map.call(u,function(v){return a(v,p,m,d,h)}))}function c(u,p,m,d){return function(h){h.delegateTarget=s(h.target,p),h.delegateTarget&&d.call(u,h)}}n.exports=f},879:function(n,o){o.node=function(i){return i!==void 0&&i instanceof HTMLElement&&i.nodeType===1},o.nodeList=function(i){var s=Object.prototype.toString.call(i);return i!==void 0&&(s==="[object NodeList]"||s==="[object HTMLCollection]")&&"length"in i&&(i.length===0||o.node(i[0]))},o.string=function(i){return typeof i=="string"||i instanceof String},o.fn=function(i){var s=Object.prototype.toString.call(i);return s==="[object Function]"}},370:function(n,o,i){var s=i(879),a=i(438);function f(m,d,h){if(!m&&!d&&!h)throw new Error("Missing required arguments");if(!s.string(d))throw new TypeError("Second argument must be a String");if(!s.fn(h))throw new TypeError("Third argument must be a Function");if(s.node(m))return c(m,d,h);if(s.nodeList(m))return u(m,d,h);if(s.string(m))return p(m,d,h);throw new TypeError("First argument must be a String, HTMLElement, HTMLCollection, or NodeList")}function c(m,d,h){return m.addEventListener(d,h),{destroy:function(){m.removeEventListener(d,h)}}}function u(m,d,h){return Array.prototype.forEach.call(m,function(v){v.addEventListener(d,h)}),{destroy:function(){Array.prototype.forEach.call(m,function(v){v.removeEventListener(d,h)})}}}function p(m,d,h){return a(document.body,m,d,h)}n.exports=f},817:function(n){function o(i){var s;if(i.nodeName==="SELECT")i.focus(),s=i.value;else if(i.nodeName==="INPUT"||i.nodeName==="TEXTAREA"){var a=i.hasAttribute("readonly");a||i.setAttribute("readonly",""),i.select(),i.setSelectionRange(0,i.value.length),a||i.removeAttribute("readonly"),s=i.value}else{i.hasAttribute("contenteditable")&&i.focus();var f=window.getSelection(),c=document.createRange();c.selectNodeContents(i),f.removeAllRanges(),f.addRange(c),s=f.toString()}return s}n.exports=o},279:function(n){function o(){}o.prototype={on:function(i,s,a){var f=this.e||(this.e={});return(f[i]||(f[i]=[])).push({fn:s,ctx:a}),this},once:function(i,s,a){var f=this;function c(){f.off(i,c),s.apply(a,arguments)}return c._=s,this.on(i,c,a)},emit:function(i){var s=[].slice.call(arguments,1),a=((this.e||(this.e={}))[i]||[]).slice(),f=0,c=a.length;for(f;f{"use strict";/*! + * escape-html + * Copyright(c) 2012-2013 TJ Holowaychuk + * Copyright(c) 2015 Andreas Lubbe + * Copyright(c) 2015 Tiancheng "Timothy" Gu + * MIT Licensed + */var rs=/["'&<>]/;Yo.exports=ns;function ns(e){var t=""+e,r=rs.exec(t);if(!r)return t;var n,o="",i=0,s=0;for(i=r.index;i0&&i[i.length-1])&&(c[0]===6||c[0]===2)){r=0;continue}if(c[0]===3&&(!i||c[1]>i[0]&&c[1]=e.length&&(e=void 0),{value:e&&e[n++],done:!e}}};throw new TypeError(t?"Object is not iterable.":"Symbol.iterator is not defined.")}function W(e,t){var r=typeof Symbol=="function"&&e[Symbol.iterator];if(!r)return e;var n=r.call(e),o,i=[],s;try{for(;(t===void 0||t-- >0)&&!(o=n.next()).done;)i.push(o.value)}catch(a){s={error:a}}finally{try{o&&!o.done&&(r=n.return)&&r.call(n)}finally{if(s)throw s.error}}return i}function D(e,t,r){if(r||arguments.length===2)for(var n=0,o=t.length,i;n1||a(m,d)})})}function a(m,d){try{f(n[m](d))}catch(h){p(i[0][3],h)}}function f(m){m.value instanceof et?Promise.resolve(m.value.v).then(c,u):p(i[0][2],m)}function c(m){a("next",m)}function u(m){a("throw",m)}function p(m,d){m(d),i.shift(),i.length&&a(i[0][0],i[0][1])}}function pn(e){if(!Symbol.asyncIterator)throw new TypeError("Symbol.asyncIterator is not defined.");var t=e[Symbol.asyncIterator],r;return t?t.call(e):(e=typeof Ee=="function"?Ee(e):e[Symbol.iterator](),r={},n("next"),n("throw"),n("return"),r[Symbol.asyncIterator]=function(){return this},r);function n(i){r[i]=e[i]&&function(s){return new Promise(function(a,f){s=e[i](s),o(a,f,s.done,s.value)})}}function o(i,s,a,f){Promise.resolve(f).then(function(c){i({value:c,done:a})},s)}}function C(e){return typeof e=="function"}function at(e){var t=function(n){Error.call(n),n.stack=new Error().stack},r=e(t);return r.prototype=Object.create(Error.prototype),r.prototype.constructor=r,r}var It=at(function(e){return function(r){e(this),this.message=r?r.length+` errors occurred during unsubscription: +`+r.map(function(n,o){return o+1+") "+n.toString()}).join(` + `):"",this.name="UnsubscriptionError",this.errors=r}});function Ve(e,t){if(e){var r=e.indexOf(t);0<=r&&e.splice(r,1)}}var Ie=function(){function e(t){this.initialTeardown=t,this.closed=!1,this._parentage=null,this._finalizers=null}return e.prototype.unsubscribe=function(){var t,r,n,o,i;if(!this.closed){this.closed=!0;var s=this._parentage;if(s)if(this._parentage=null,Array.isArray(s))try{for(var a=Ee(s),f=a.next();!f.done;f=a.next()){var c=f.value;c.remove(this)}}catch(v){t={error:v}}finally{try{f&&!f.done&&(r=a.return)&&r.call(a)}finally{if(t)throw t.error}}else s.remove(this);var u=this.initialTeardown;if(C(u))try{u()}catch(v){i=v instanceof It?v.errors:[v]}var p=this._finalizers;if(p){this._finalizers=null;try{for(var m=Ee(p),d=m.next();!d.done;d=m.next()){var h=d.value;try{ln(h)}catch(v){i=i!=null?i:[],v instanceof It?i=D(D([],W(i)),W(v.errors)):i.push(v)}}}catch(v){n={error:v}}finally{try{d&&!d.done&&(o=m.return)&&o.call(m)}finally{if(n)throw n.error}}}if(i)throw new It(i)}},e.prototype.add=function(t){var r;if(t&&t!==this)if(this.closed)ln(t);else{if(t instanceof e){if(t.closed||t._hasParent(this))return;t._addParent(this)}(this._finalizers=(r=this._finalizers)!==null&&r!==void 0?r:[]).push(t)}},e.prototype._hasParent=function(t){var r=this._parentage;return r===t||Array.isArray(r)&&r.includes(t)},e.prototype._addParent=function(t){var r=this._parentage;this._parentage=Array.isArray(r)?(r.push(t),r):r?[r,t]:t},e.prototype._removeParent=function(t){var r=this._parentage;r===t?this._parentage=null:Array.isArray(r)&&Ve(r,t)},e.prototype.remove=function(t){var r=this._finalizers;r&&Ve(r,t),t instanceof e&&t._removeParent(this)},e.EMPTY=function(){var t=new e;return t.closed=!0,t}(),e}();var Sr=Ie.EMPTY;function jt(e){return e instanceof Ie||e&&"closed"in e&&C(e.remove)&&C(e.add)&&C(e.unsubscribe)}function ln(e){C(e)?e():e.unsubscribe()}var Le={onUnhandledError:null,onStoppedNotification:null,Promise:void 0,useDeprecatedSynchronousErrorHandling:!1,useDeprecatedNextContext:!1};var st={setTimeout:function(e,t){for(var r=[],n=2;n0},enumerable:!1,configurable:!0}),t.prototype._trySubscribe=function(r){return this._throwIfClosed(),e.prototype._trySubscribe.call(this,r)},t.prototype._subscribe=function(r){return this._throwIfClosed(),this._checkFinalizedStatuses(r),this._innerSubscribe(r)},t.prototype._innerSubscribe=function(r){var n=this,o=this,i=o.hasError,s=o.isStopped,a=o.observers;return i||s?Sr:(this.currentObservers=null,a.push(r),new Ie(function(){n.currentObservers=null,Ve(a,r)}))},t.prototype._checkFinalizedStatuses=function(r){var n=this,o=n.hasError,i=n.thrownError,s=n.isStopped;o?r.error(i):s&&r.complete()},t.prototype.asObservable=function(){var r=new F;return r.source=this,r},t.create=function(r,n){return new xn(r,n)},t}(F);var xn=function(e){ie(t,e);function t(r,n){var o=e.call(this)||this;return o.destination=r,o.source=n,o}return t.prototype.next=function(r){var n,o;(o=(n=this.destination)===null||n===void 0?void 0:n.next)===null||o===void 0||o.call(n,r)},t.prototype.error=function(r){var n,o;(o=(n=this.destination)===null||n===void 0?void 0:n.error)===null||o===void 0||o.call(n,r)},t.prototype.complete=function(){var r,n;(n=(r=this.destination)===null||r===void 0?void 0:r.complete)===null||n===void 0||n.call(r)},t.prototype._subscribe=function(r){var n,o;return(o=(n=this.source)===null||n===void 0?void 0:n.subscribe(r))!==null&&o!==void 0?o:Sr},t}(x);var Et={now:function(){return(Et.delegate||Date).now()},delegate:void 0};var wt=function(e){ie(t,e);function t(r,n,o){r===void 0&&(r=1/0),n===void 0&&(n=1/0),o===void 0&&(o=Et);var i=e.call(this)||this;return i._bufferSize=r,i._windowTime=n,i._timestampProvider=o,i._buffer=[],i._infiniteTimeWindow=!0,i._infiniteTimeWindow=n===1/0,i._bufferSize=Math.max(1,r),i._windowTime=Math.max(1,n),i}return t.prototype.next=function(r){var n=this,o=n.isStopped,i=n._buffer,s=n._infiniteTimeWindow,a=n._timestampProvider,f=n._windowTime;o||(i.push(r),!s&&i.push(a.now()+f)),this._trimBuffer(),e.prototype.next.call(this,r)},t.prototype._subscribe=function(r){this._throwIfClosed(),this._trimBuffer();for(var n=this._innerSubscribe(r),o=this,i=o._infiniteTimeWindow,s=o._buffer,a=s.slice(),f=0;f0?e.prototype.requestAsyncId.call(this,r,n,o):(r.actions.push(this),r._scheduled||(r._scheduled=ut.requestAnimationFrame(function(){return r.flush(void 0)})))},t.prototype.recycleAsyncId=function(r,n,o){var i;if(o===void 0&&(o=0),o!=null?o>0:this.delay>0)return e.prototype.recycleAsyncId.call(this,r,n,o);var s=r.actions;n!=null&&((i=s[s.length-1])===null||i===void 0?void 0:i.id)!==n&&(ut.cancelAnimationFrame(n),r._scheduled=void 0)},t}(Wt);var Sn=function(e){ie(t,e);function t(){return e!==null&&e.apply(this,arguments)||this}return t.prototype.flush=function(r){this._active=!0;var n=this._scheduled;this._scheduled=void 0;var o=this.actions,i;r=r||o.shift();do if(i=r.execute(r.state,r.delay))break;while((r=o[0])&&r.id===n&&o.shift());if(this._active=!1,i){for(;(r=o[0])&&r.id===n&&o.shift();)r.unsubscribe();throw i}},t}(Dt);var Oe=new Sn(wn);var M=new F(function(e){return e.complete()});function Vt(e){return e&&C(e.schedule)}function Cr(e){return e[e.length-1]}function Ye(e){return C(Cr(e))?e.pop():void 0}function Te(e){return Vt(Cr(e))?e.pop():void 0}function zt(e,t){return typeof Cr(e)=="number"?e.pop():t}var pt=function(e){return e&&typeof e.length=="number"&&typeof e!="function"};function Nt(e){return C(e==null?void 0:e.then)}function qt(e){return C(e[ft])}function Kt(e){return Symbol.asyncIterator&&C(e==null?void 0:e[Symbol.asyncIterator])}function Qt(e){return new TypeError("You provided "+(e!==null&&typeof e=="object"?"an invalid object":"'"+e+"'")+" where a stream was expected. You can provide an Observable, Promise, ReadableStream, Array, AsyncIterable, or Iterable.")}function zi(){return typeof Symbol!="function"||!Symbol.iterator?"@@iterator":Symbol.iterator}var Yt=zi();function Gt(e){return C(e==null?void 0:e[Yt])}function Bt(e){return un(this,arguments,function(){var r,n,o,i;return $t(this,function(s){switch(s.label){case 0:r=e.getReader(),s.label=1;case 1:s.trys.push([1,,9,10]),s.label=2;case 2:return[4,et(r.read())];case 3:return n=s.sent(),o=n.value,i=n.done,i?[4,et(void 0)]:[3,5];case 4:return[2,s.sent()];case 5:return[4,et(o)];case 6:return[4,s.sent()];case 7:return s.sent(),[3,2];case 8:return[3,10];case 9:return r.releaseLock(),[7];case 10:return[2]}})})}function Jt(e){return C(e==null?void 0:e.getReader)}function U(e){if(e instanceof F)return e;if(e!=null){if(qt(e))return Ni(e);if(pt(e))return qi(e);if(Nt(e))return Ki(e);if(Kt(e))return On(e);if(Gt(e))return Qi(e);if(Jt(e))return Yi(e)}throw Qt(e)}function Ni(e){return new F(function(t){var r=e[ft]();if(C(r.subscribe))return r.subscribe(t);throw new TypeError("Provided object does not correctly implement Symbol.observable")})}function qi(e){return new F(function(t){for(var r=0;r=2;return function(n){return n.pipe(e?A(function(o,i){return e(o,i,n)}):de,ge(1),r?He(t):Dn(function(){return new Zt}))}}function Vn(){for(var e=[],t=0;t=2,!0))}function pe(e){e===void 0&&(e={});var t=e.connector,r=t===void 0?function(){return new x}:t,n=e.resetOnError,o=n===void 0?!0:n,i=e.resetOnComplete,s=i===void 0?!0:i,a=e.resetOnRefCountZero,f=a===void 0?!0:a;return function(c){var u,p,m,d=0,h=!1,v=!1,Y=function(){p==null||p.unsubscribe(),p=void 0},B=function(){Y(),u=m=void 0,h=v=!1},N=function(){var O=u;B(),O==null||O.unsubscribe()};return y(function(O,Qe){d++,!v&&!h&&Y();var De=m=m!=null?m:r();Qe.add(function(){d--,d===0&&!v&&!h&&(p=$r(N,f))}),De.subscribe(Qe),!u&&d>0&&(u=new rt({next:function($e){return De.next($e)},error:function($e){v=!0,Y(),p=$r(B,o,$e),De.error($e)},complete:function(){h=!0,Y(),p=$r(B,s),De.complete()}}),U(O).subscribe(u))})(c)}}function $r(e,t){for(var r=[],n=2;ne.next(document)),e}function K(e,t=document){return Array.from(t.querySelectorAll(e))}function z(e,t=document){let r=ce(e,t);if(typeof r=="undefined")throw new ReferenceError(`Missing element: expected "${e}" to be present`);return r}function ce(e,t=document){return t.querySelector(e)||void 0}function _e(){return document.activeElement instanceof HTMLElement&&document.activeElement||void 0}function tr(e){return L(b(document.body,"focusin"),b(document.body,"focusout")).pipe(ke(1),l(()=>{let t=_e();return typeof t!="undefined"?e.contains(t):!1}),V(e===_e()),J())}function Xe(e){return{x:e.offsetLeft,y:e.offsetTop}}function Kn(e){return L(b(window,"load"),b(window,"resize")).pipe(Ce(0,Oe),l(()=>Xe(e)),V(Xe(e)))}function rr(e){return{x:e.scrollLeft,y:e.scrollTop}}function dt(e){return L(b(e,"scroll"),b(window,"resize")).pipe(Ce(0,Oe),l(()=>rr(e)),V(rr(e)))}var Yn=function(){if(typeof Map!="undefined")return Map;function e(t,r){var n=-1;return t.some(function(o,i){return o[0]===r?(n=i,!0):!1}),n}return function(){function t(){this.__entries__=[]}return Object.defineProperty(t.prototype,"size",{get:function(){return this.__entries__.length},enumerable:!0,configurable:!0}),t.prototype.get=function(r){var n=e(this.__entries__,r),o=this.__entries__[n];return o&&o[1]},t.prototype.set=function(r,n){var o=e(this.__entries__,r);~o?this.__entries__[o][1]=n:this.__entries__.push([r,n])},t.prototype.delete=function(r){var n=this.__entries__,o=e(n,r);~o&&n.splice(o,1)},t.prototype.has=function(r){return!!~e(this.__entries__,r)},t.prototype.clear=function(){this.__entries__.splice(0)},t.prototype.forEach=function(r,n){n===void 0&&(n=null);for(var o=0,i=this.__entries__;o0},e.prototype.connect_=function(){!Wr||this.connected_||(document.addEventListener("transitionend",this.onTransitionEnd_),window.addEventListener("resize",this.refresh),va?(this.mutationsObserver_=new MutationObserver(this.refresh),this.mutationsObserver_.observe(document,{attributes:!0,childList:!0,characterData:!0,subtree:!0})):(document.addEventListener("DOMSubtreeModified",this.refresh),this.mutationEventsAdded_=!0),this.connected_=!0)},e.prototype.disconnect_=function(){!Wr||!this.connected_||(document.removeEventListener("transitionend",this.onTransitionEnd_),window.removeEventListener("resize",this.refresh),this.mutationsObserver_&&this.mutationsObserver_.disconnect(),this.mutationEventsAdded_&&document.removeEventListener("DOMSubtreeModified",this.refresh),this.mutationsObserver_=null,this.mutationEventsAdded_=!1,this.connected_=!1)},e.prototype.onTransitionEnd_=function(t){var r=t.propertyName,n=r===void 0?"":r,o=ba.some(function(i){return!!~n.indexOf(i)});o&&this.refresh()},e.getInstance=function(){return this.instance_||(this.instance_=new e),this.instance_},e.instance_=null,e}(),Gn=function(e,t){for(var r=0,n=Object.keys(t);r0},e}(),Jn=typeof WeakMap!="undefined"?new WeakMap:new Yn,Xn=function(){function e(t){if(!(this instanceof e))throw new TypeError("Cannot call a class as a function.");if(!arguments.length)throw new TypeError("1 argument required, but only 0 present.");var r=ga.getInstance(),n=new La(t,r,this);Jn.set(this,n)}return e}();["observe","unobserve","disconnect"].forEach(function(e){Xn.prototype[e]=function(){var t;return(t=Jn.get(this))[e].apply(t,arguments)}});var Aa=function(){return typeof nr.ResizeObserver!="undefined"?nr.ResizeObserver:Xn}(),Zn=Aa;var eo=new x,Ca=$(()=>k(new Zn(e=>{for(let t of e)eo.next(t)}))).pipe(g(e=>L(ze,k(e)).pipe(R(()=>e.disconnect()))),X(1));function he(e){return{width:e.offsetWidth,height:e.offsetHeight}}function ye(e){return Ca.pipe(S(t=>t.observe(e)),g(t=>eo.pipe(A(({target:r})=>r===e),R(()=>t.unobserve(e)),l(()=>he(e)))),V(he(e)))}function bt(e){return{width:e.scrollWidth,height:e.scrollHeight}}function ar(e){let t=e.parentElement;for(;t&&(e.scrollWidth<=t.scrollWidth&&e.scrollHeight<=t.scrollHeight);)t=(e=t).parentElement;return t?e:void 0}var to=new x,Ra=$(()=>k(new IntersectionObserver(e=>{for(let t of e)to.next(t)},{threshold:0}))).pipe(g(e=>L(ze,k(e)).pipe(R(()=>e.disconnect()))),X(1));function sr(e){return Ra.pipe(S(t=>t.observe(e)),g(t=>to.pipe(A(({target:r})=>r===e),R(()=>t.unobserve(e)),l(({isIntersecting:r})=>r))))}function ro(e,t=16){return dt(e).pipe(l(({y:r})=>{let n=he(e),o=bt(e);return r>=o.height-n.height-t}),J())}var cr={drawer:z("[data-md-toggle=drawer]"),search:z("[data-md-toggle=search]")};function no(e){return cr[e].checked}function Ke(e,t){cr[e].checked!==t&&cr[e].click()}function Ue(e){let t=cr[e];return b(t,"change").pipe(l(()=>t.checked),V(t.checked))}function ka(e,t){switch(e.constructor){case HTMLInputElement:return e.type==="radio"?/^Arrow/.test(t):!0;case HTMLSelectElement:case HTMLTextAreaElement:return!0;default:return e.isContentEditable}}function Ha(){return L(b(window,"compositionstart").pipe(l(()=>!0)),b(window,"compositionend").pipe(l(()=>!1))).pipe(V(!1))}function oo(){let e=b(window,"keydown").pipe(A(t=>!(t.metaKey||t.ctrlKey)),l(t=>({mode:no("search")?"search":"global",type:t.key,claim(){t.preventDefault(),t.stopPropagation()}})),A(({mode:t,type:r})=>{if(t==="global"){let n=_e();if(typeof n!="undefined")return!ka(n,r)}return!0}),pe());return Ha().pipe(g(t=>t?M:e))}function le(){return new URL(location.href)}function ot(e){location.href=e.href}function io(){return new x}function ao(e,t){if(typeof t=="string"||typeof t=="number")e.innerHTML+=t.toString();else if(t instanceof Node)e.appendChild(t);else if(Array.isArray(t))for(let r of t)ao(e,r)}function _(e,t,...r){let n=document.createElement(e);if(t)for(let o of Object.keys(t))typeof t[o]!="undefined"&&(typeof t[o]!="boolean"?n.setAttribute(o,t[o]):n.setAttribute(o,""));for(let o of r)ao(n,o);return n}function fr(e){if(e>999){let t=+((e-950)%1e3>99);return`${((e+1e-6)/1e3).toFixed(t)}k`}else return e.toString()}function so(){return location.hash.substring(1)}function Dr(e){let t=_("a",{href:e});t.addEventListener("click",r=>r.stopPropagation()),t.click()}function Pa(e){return L(b(window,"hashchange"),e).pipe(l(so),V(so()),A(t=>t.length>0),X(1))}function co(e){return Pa(e).pipe(l(t=>ce(`[id="${t}"]`)),A(t=>typeof t!="undefined"))}function Vr(e){let t=matchMedia(e);return er(r=>t.addListener(()=>r(t.matches))).pipe(V(t.matches))}function fo(){let e=matchMedia("print");return L(b(window,"beforeprint").pipe(l(()=>!0)),b(window,"afterprint").pipe(l(()=>!1))).pipe(V(e.matches))}function zr(e,t){return e.pipe(g(r=>r?t():M))}function ur(e,t={credentials:"same-origin"}){return ue(fetch(`${e}`,t)).pipe(fe(()=>M),g(r=>r.status!==200?Ot(()=>new Error(r.statusText)):k(r)))}function We(e,t){return ur(e,t).pipe(g(r=>r.json()),X(1))}function uo(e,t){let r=new DOMParser;return ur(e,t).pipe(g(n=>n.text()),l(n=>r.parseFromString(n,"text/xml")),X(1))}function pr(e){let t=_("script",{src:e});return $(()=>(document.head.appendChild(t),L(b(t,"load"),b(t,"error").pipe(g(()=>Ot(()=>new ReferenceError(`Invalid script: ${e}`))))).pipe(l(()=>{}),R(()=>document.head.removeChild(t)),ge(1))))}function po(){return{x:Math.max(0,scrollX),y:Math.max(0,scrollY)}}function lo(){return L(b(window,"scroll",{passive:!0}),b(window,"resize",{passive:!0})).pipe(l(po),V(po()))}function mo(){return{width:innerWidth,height:innerHeight}}function ho(){return b(window,"resize",{passive:!0}).pipe(l(mo),V(mo()))}function bo(){return G([lo(),ho()]).pipe(l(([e,t])=>({offset:e,size:t})),X(1))}function lr(e,{viewport$:t,header$:r}){let n=t.pipe(ee("size")),o=G([n,r]).pipe(l(()=>Xe(e)));return G([r,t,o]).pipe(l(([{height:i},{offset:s,size:a},{x:f,y:c}])=>({offset:{x:s.x-f,y:s.y-c+i},size:a})))}(()=>{function e(n,o){parent.postMessage(n,o||"*")}function t(...n){return n.reduce((o,i)=>o.then(()=>new Promise(s=>{let a=document.createElement("script");a.src=i,a.onload=s,document.body.appendChild(a)})),Promise.resolve())}var r=class extends EventTarget{constructor(n){super(),this.url=n,this.m=i=>{i.source===this.w&&(this.dispatchEvent(new MessageEvent("message",{data:i.data})),this.onmessage&&this.onmessage(i))},this.e=(i,s,a,f,c)=>{if(s===`${this.url}`){let u=new ErrorEvent("error",{message:i,filename:s,lineno:a,colno:f,error:c});this.dispatchEvent(u),this.onerror&&this.onerror(u)}};let o=document.createElement("iframe");o.hidden=!0,document.body.appendChild(this.iframe=o),this.w.document.open(),this.w.document.write(` + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Write A New Model

+

This document provides a reference template for writing the model definition file model.py in the MindSpore, aiming to provide a unified code style.

+

Next, let's take MLP-Mixer as an example.

+

File Header

+

A brief description of the document. Include the model name and paper title. As follows:

+
"""
+MindSpore implementation of `${MODEL_NAME}`.
+Refer to ${PAPER_NAME}.
+"""
+
+

Module Import

+

There are three types of module imports. Respectively

+
    +
  • Python native or third-party libraries. For example, import math and import numpy as np. It should be placed in the first echelon.
  • +
  • MindSpore related modules. For example, import mindspore.nn as nn and import mindspore.ops as ops. It should be placed in the second echelon.
  • +
  • The module in the MindCV package. For example, from .layers.classifier import ClassifierHead. It should be placed in the third echelon and use relative import.
  • +
+

Examples are as follows:

+
import math
+from collections import OrderedDict
+
+import mindspore.nn as nn
+import mindspore.ops as ops
+import mindspore.common.initializer as init
+
+from .utils import load_pretrained
+from .layers.classifier import ClassifierHead
+
+

Only import necessary modules or packages to avoid importing useless packages.

+

__all__

+
+

Python has no native visibility control, its visibility is maintained by a set of "conventions" that everyone should consciously abide by __all__ is a convention for exposing interfaces to modules and provides a "white list" to expose the interface. If __all__ is defined, other files use from xxx import * to import this file, only the members listed in __all__ will be imported, and other members can be excluded.

+
+

We agree that the exposed interfaces in the model include the main model class and functions that return models of different specifications, such as:

+
__all__ = [
+    "MLPMixer",
+    "mlp_mixer_s_p32",
+    "mlp_mixer_s_p16",
+    ...
+]
+
+

Where MLPMixer is the main model class, and mlp_mixer_s_p32 and mlp_mixer_s_p16 are functions that return models of different specifications. Generally speaking, a submodel, that is, a Layer or a Block, should not be shared by other files. If this is the case, you should consider extracting the submodel under ${MINDCLS}/models/layers as a common module, such as SEBlock.

+

Submodel

+

We all know that a depth model is a network composed of multiple layers. Some of these layers can form sub-models of the same topology, which we generally call Layer or Block, such as ResidualBlock. This kind of abstraction is conducive to our understanding of the whole model structure and is also conducive to code writing.

+

We should briefly describe the function of the sub-model through class annotations. In MindSpore, the model class inherits from nn.Cell. Generally speaking, we need to overload the following two functions:

+
    +
  • In the __init__ function, we should define the neural network layer that needs to be used in the model (the parameters in __init__ should be declared with parameter types, that is, type hint).
  • +
  • In the construct function, we define the model forward logic.
  • +
+

Examples are as follows:

+
class MixerBlock(nn.Cell):
+    """Mixer Layer with token-mixing MLP and channel-mixing MLP"""
+
+    def __init__(self,
+                 n_patches: int,
+                 n_channels: int,
+                 token_dim: int,
+                 channel_dim: int,
+                 dropout: float = 0.
+                 ) -> None:
+        super().__init__()
+        self.token_mix = nn.SequentialCell(
+            nn.LayerNorm((n_channels,)),
+            TransPose((0, 2, 1)),
+            FeedForward(n_patches, token_dim, dropout),
+            TransPose((0, 2, 1))
+        )
+        self.channel_mix = nn.SequentialCell(
+            nn.LayerNorm((n_channels,)),
+            FeedForward(n_channels, channel_dim, dropout),
+        )
+
+    def construct(self, x):
+        x = x + self.token_mix(x)
+        x = x + self.channel_mix(x)
+        return x
+
+

In the process of compiling the nn.Cell class, there are two noteworthy aspects

+
    +
  • +

    CellList & SequentialCell

    +
  • +
  • +

    CellList is just a container that contains a list of neural network layers(Cell). The Cells contained by it can be properly registered and will be visible by all Cell methods. We must overwrite the forward calculation, that is, the construct function.

    +
  • +
  • +

    SequentialCell is a container that holds a sequential list of layers(Cell). The Cells may have a name(OrderedDict) or not(List). We don't need to implement forward computation, which is done according to the order of the sequential list.

    +
  • +
  • +

    construct

    +
  • +
  • +

    Assert is not supported. [RuntimeError: ParseStatement] Unsupported statement 'Assert'.

    +
  • +
  • +

    Usage of single operator. When calling an operator (such as concat, reshape, mean), use the functional interface mindspore.ops.functional (such as output=ops.concat((x1, x2)) to avoid instantiating the original operator ops.Primary (such as self.Concat()) in init before calling it in construct (output=self.concat((x1, x2)).

    +
  • +
+

Master Model

+

The main model is the network model definition proposed in the paper, which is composed of multiple sub-models. It is the top-level network suitable for classification, detection, and other tasks. It is basically similar to the submodel in code writing, but there are several differences.

+
    +
  • Class annotations. We should give the title and link of the paper here. In addition, since this class is exposed to the outside world, we'd better also add a description of the class initialization parameters. See code below.
  • +
  • forward_features function. The operational definition of the characteristic network of the model in the function.
  • +
  • forward_head function. The operation of the classifier of the model is defined in the function.
  • +
  • construct function. In function call feature network and classifier operation.
  • +
  • _initialize_weights function. We agree that the random initialization of model parameters is completed by this member function. See code below.
  • +
+

Examples are as follows:

+
class MLPMixer(nn.Cell):
+    r"""MLP-Mixer model class, based on
+    `"MLP-Mixer: An all-MLP Architecture for Vision" <https://arxiv.org/abs/2105.01601>`_
+
+    Args:
+        depth (int) : number of MixerBlocks.
+        patch_size (Union[int, tuple]) : size of a single image patch.
+        n_patches (int) : number of patches.
+        n_channels (int) : channels(dimension) of a single embedded patch.
+        token_dim (int) : hidden dim of token-mixing MLP.
+        channel_dim (int) : hidden dim of channel-mixing MLP.
+        in_channels(int): number the channels of the input. Default: 3.
+        n_classes (int) : number of classification classes. Default: 1000.
+    """
+
+    def __init__(self,
+                 depth: int,
+                 patch_size: Union[int, tuple],
+                 n_patches: int,
+                 n_channels: int,
+                 token_dim: int,
+                 channel_dim: int,
+                 in_channels: int = 3,
+                 n_classes: int = 1000,
+                 ) -> None:
+        super().__init__()
+        self.n_patches = n_patches
+        self.n_channels = n_channels
+        # patch with shape of (3, patch_size, patch_size) is embedded to n_channels dim feature.
+        self.to_patch_embedding = nn.SequentialCell(
+            nn.Conv2d(in_chans, n_channels, patch_size, patch_size, pad_mode="pad", padding=0),
+            TransPose(permutation=(0, 2, 1), embedding=True),
+        )
+        self.mixer_blocks = nn.SequentialCell()
+        for _ in range(depth):
+            self.mixer_blocks.append(MixerBlock(n_patches, n_channels, token_dim, channel_dim))
+        self.layer_norm = nn.LayerNorm((n_channels,))
+        self.mlp_head = nn.Dense(n_channels, n_classes)
+        self._initialize_weights()
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.to_patch_embedding(x)
+        x = self.mixer_blocks(x)
+        x = self.layer_norm(x)
+        return ops.mean(x, 1)
+
+    def forward_head(self, x: Tensor)-> Tensor:
+        return self.mlp_head(x)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        return self.forward_head(x)
+
+    def _initialize_weights(self) -> None:
+        for name, m in self.cells_and_names():
+            if isinstance(m, nn.Conv2d):
+                m.weight.set_data(init.initializer(init.Normal(0.01, 0), m.weight.shape))
+                if m.bias is not None:
+                    m.bias.set_data(init.initializer(init.Constant(0), m.bias.shape))
+            elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
+                m.gamma.set_data(init.initializer(init.Constant(1), m.gamma.shape))
+                if m.beta is not None:
+                    m.beta.set_data(init.initializer(init.Constant(0.0001), m.beta.shape))
+            elif isinstance(m, nn.Dense):
+                m.weight.set_data(init.initializer(init.Normal(0.01, 0), m.weight.shape))
+                if m.bias is not None:
+                    m.bias.set_data(init.initializer(init.Constant(0), m.bias.shape))
+
+

Specification Function

+

The model proposed in the paper may have different specifications, such as the size of the channel, the size of the depth, and so on. The specific configuration of these variants should be reflected through the specification function. The specification interface parameters: pretrained, num_classes, in_channels should be named uniformly. At the same time, the pretrain loading operation should be performed in the specification function. Each specification function corresponds to a specification variant that determines the configuration. The configuration transfers the definition of the main model class through the input parameter and returns the instantiated main model class. In addition, you need to register this specification of the model in the package by adding the decorator @register_model.

+

Examples are as follows:

+
@register_model
+def mlp_mixer_s_p16(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 8, 16, 196, 512, 256, 2048
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds,
+                    channel_dim=dc, in_chans=in_chans, n_classes=num_classes, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+@register_model
+def mlp_mixer_b_p32(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 12, 32, 49, 768, 384, 3072
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds,
+                    channel_dim=dc, in_chans=in_chans, n_classes=num_classes, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+

Verify Main (Optional)

+

The initial writing phase should ensure that the model is operational. The following code blocks can be used for basic verification:

+
if __name__ == '__main__':
+    import numpy as np
+    import mindspore
+    from mindspore import Tensor
+
+    model = mlp_mixer_s_p16()
+    print(model)
+    dummy_input = Tensor(np.random.rand(8, 3, 224, 224), dtype=mindspore.float32)
+    y = model(dummy_input)
+    print(y.shape)
+
+

Reference Example

+
    +
  • densenet.py
  • +
  • shufflenetv1.py
  • +
  • shufflenetv2.py
  • +
  • mixnet.py
  • +
  • mlp_mixer.py
  • +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/index.html b/en/index.html new file mode 100644 index 000000000..08c25e03f --- /dev/null +++ b/en/index.html @@ -0,0 +1,1499 @@ + + + + + + + + + + + + + + + + + + + + + + Home - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +
+

MindCV

+

CI +PyPI - Python Version +PyPI +docs +license +open issues +PRs +Code style: black +Imports: isort +pre-commit

+
+

Introduction

+

MindCV is an open-source toolbox for computer vision research and development based on MindSpore. It collects a series of classic and SoTA vision models, such as ResNet and SwinTransformer, along with their pre-trained weights and training strategies. SoTA methods such as auto augmentation are also provided for performance improvement. With the decoupled module design, it is easy to apply or adapt MindCV to your own CV tasks.

+

Major Features

+
    +
  • +

    Easy-to-Use. MindCV decomposes the vision framework into various configurable components. It is easy to customize your data pipeline, models, and learning pipeline with MindCV:

    +
    >>> import mindcv
    +# create a dataset
    +>>> dataset = mindcv.create_dataset('cifar10', download=True)
    +# create a model
    +>>> network = mindcv.create_model('resnet50', pretrained=True)
    +
    +

    Users can customize and launch their transfer learning or training task in one command line.

    +
    # transfer learning in one command line
    +python train.py --model=swin_tiny --pretrained --opt=adamw --lr=0.001 --data_dir=/path/to/data
    +
    +
  • +
  • +

    State-of-The-Art. MindCV provides various CNN-based and Transformer-based vision models including SwinTransformer. Their pretrained weights and performance reports are provided to help users select and reuse the right model:

    +
  • +
  • +

    Flexibility and efficiency. MindCV is built on MindSpore which is an efficient DL framework that can be run on different hardware platforms (GPU/CPU/Ascend). It supports both graph mode for high efficiency and pynative mode for flexibility.

    +
  • +
+

Model Zoo

+

The performance of the models trained with MindCV is summarized in here, where the training recipes and weights are both available.

+

Model introduction and training details can be viewed in each sub-folder under configs.

+

Installation

+

See Installation for details.

+

Getting Started

+

Hands-on Tutorial

+

To get started with MindCV, please see the Quick Start, which will give you a quick tour of each key component and the train/validate/predict pipelines.

+

Below are a few code snippets for your taste.

+
>>> import mindcv
+# List and find a pretrained vision model
+>>> mindcv.list_models("swin*", pretrained=True)
+['swin_tiny']
+# Create the model object
+>>> network = mindcv.create_model('swin_tiny', pretrained=True)
+# Validate its accuracy
+>>> !python validate.py --model=swin_tiny --pretrained --dataset=imagenet --val_split=validation
+{'Top_1_Accuracy': 0.80824, 'Top_5_Accuracy': 0.94802, 'loss': 1.7331367141008378}
+
+
+Image Classification Demo +

Right click on the image below and save as dog.jpg.

+

+ +

+

Classify the downloaded image with a pretrained SoTA model:

+
>>> !python infer.py --model=swin_tiny --image_path='./dog.jpg'
+{'Labrador retriever': 0.5700152, 'golden retriever': 0.034551315, 'kelpie': 0.010108651, 'Chesapeake Bay retriever': 0.008229004, 'Walker hound, Walker foxhound': 0.007791956}
+
+

The top-1 prediction result is labrador retriever, which is the breed of this cut dog.

+
+

Training

+

It is easy to train your model on a standard or customized dataset using train.py, where the training strategy (e.g., augmentation, LR scheduling) can be configured with external arguments or a yaml config file.

+
    +
  • +

    Standalone Training

    +
    # standalone training
    +python train.py --model=resnet50 --dataset=cifar10 --dataset_download
    +
    +

    Above is an example of training ResNet50 on CIFAR10 dataset on a CPU/GPU/Ascend device

    +
  • +
  • +

    Distributed Training

    +

    For large datasets like ImageNet, it is necessary to do training in distributed mode on multiple devices. This can be achieved with mpirun and parallel features supported by MindSpore.

    +
    # distributed training
    +# assume you have 4 GPUs/NPUs
    +mpirun -n 4 python train.py --distribute \
    +    --model=densenet121 --dataset=imagenet --data_dir=/path/to/imagenet
    +
    +
    +

    Notes: If the script is executed by the root user, the --allow-run-as-root parameter must be added to mpirun.

    +
    +

    Detailed parameter definitions can be seen in config.py and checked by running `python train.py --help'.

    +

    To resume training, please set the --ckpt_path and --ckpt_save_dir arguments. The optimizer state including the learning rate of the last stopped epoch will also be recovered.

    +
  • +
  • +

    Config and Training Strategy

    +

    You can configure your model and other components either by specifying external parameters or by writing a yaml config file. Here is an example of training using a preset yaml file.

    +
    mpirun --allow-run-as-root -n 4 python train.py -c configs/squeezenet/squeezenet_1.0_gpu.yaml
    +
    +
    +

    Pre-defined Training Strategies

    +

    We provide more than 20 training recipes that achieve SoTA results on ImageNet currently. +Please look into the configs folder for details. +Please feel free to adapt these training strategies to your own model for performance improvement, which can be easily done by modifying the yaml file.

    +
    +
  • +
  • +

    Train on ModelArts/OpenI Platform

    +

    To run training on the ModelArts or OpenI cloud platform:

    +
    1. Create a new training task on the cloud platform.
    +2. Add the parameter `config` and specify the path to the yaml config file on the website UI interface.
    +3. Add the parameter `enable_modelarts` and set True on the website UI interface.
    +4. Fill in other blanks on the website and launch the training task.
    +
    +
  • +
+
+

Graph Mode and PyNative Mode

+

By default, the training pipeline train.py is run in graph mode on MindSpore, which is optimized for efficiency and parallel computing with a compiled static graph. +In contrast, pynative mode is optimized for flexibility and easy debugging. You may alter the parameter --mode to switch to pure pynative mode for debugging purpose.

+
+
+

Mixed Mode

+

PyNative mode with mindspore.jit is a mixed mode for comprising flexibility and efficiency in MindSpore. To apply pynative mode with mindspore.jit for training, please run train_with_func.py, e.g.,

+
python train_with_func.py --model=resnet50 --dataset=cifar10 --dataset_download  --epoch_size=10
+
+
+

Note: this is an experimental function under improvement. It is not stable on MindSpore 1.8.1 or earlier versions.

+
+
+

Validation

+

To evaluate the model performance, please run validate.py

+
# validate a trained checkpoint
+python validate.py --model=resnet50 --dataset=imagenet --data_dir=/path/to/data --ckpt_path=/path/to/model.ckpt
+
+
+

Validation while Training

+

You can also track the validation accuracy during training by enabling the --val_while_train option.

+
python train.py --model=resnet50 --dataset=cifar10 \
+    --val_while_train --val_split=test --val_interval=1
+
+

The training loss and validation accuracy for each epoch will be saved in ${ckpt_save_dir}/results.log.

+

More examples about training and validation can be seen in examples.

+
+

Tutorials

+

We provide the following jupyter notebook tutorials to help users learn to use MindCV.

+ +

Supported Algorithms

+
+ Supported algorithms +
    +
  • Augmentation +
  • +
  • Optimizer
      +
    • Adam
    • +
    • AdamW
    • +
    • Lion
    • +
    • Adan (experimental)
    • +
    • AdaGrad
    • +
    • LAMB
    • +
    • Momentum
    • +
    • RMSProp
    • +
    • SGD
    • +
    • NAdam
    • +
    +
  • +
  • LR Scheduler
      +
    • Warmup Cosine Decay
    • +
    • Step LR
    • +
    • Polynomial Decay
    • +
    • Exponential Decay
    • +
    +
  • +
  • Regularization
      +
    • Weight Decay
    • +
    • Label Smoothing
    • +
    • Stochastic Depth (depends on networks)
    • +
    • Dropout (depends on networks)
    • +
    +
  • +
  • Loss
      +
    • Cross Entropy (w/ class weight and auxiliary logit support)
    • +
    • Binary Cross Entropy (w/ class weight and auxiliary logit support)
    • +
    • Soft Cross Entropy Loss (automatically enabled if mixup or label smoothing is used)
    • +
    • Soft Binary Cross Entropy Loss (automatically enabled if mixup or label smoothing is used)
    • +
    +
  • +
  • Ensemble
      +
    • Warmup EMA (Exponential Moving Average)
    • +
    +
  • +
+
+

How to Contribute

+

We appreciate all kinds of contributions including issues and PRs to make MindCV better.

+

Please refer to CONTRIBUTING for the contributing guideline. +Please follow the Model Template and Guideline for contributing a model that fits the overall interface :)

+

License

+

This project follows the Apache License 2.0 open-source license.

+

Acknowledgement

+

MindCV is an open-source project jointly developed by the MindSpore team, Xidian University, and Xi'an Jiaotong University. +Sincere thanks to all participating researchers and developers for their hard work on this project. +We also acknowledge the computing resources provided by OpenI.

+

Citation

+

If you find this project useful in your research, please consider citing:

+
@misc{MindSpore Computer Vision 2022,
+    title={{MindSpore Computer  Vision}:MindSpore Computer Vision Toolbox and Benchmark},
+    author={MindSpore Vision Contributors},
+    howpublished = {\url{https://github.com/mindspore-lab/mindcv/}},
+    year={2022}
+}
+
+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/installation/index.html b/en/installation/index.html new file mode 100644 index 000000000..ad18765da --- /dev/null +++ b/en/installation/index.html @@ -0,0 +1,1211 @@ + + + + + + + + + + + + + + + + + + + + + + + + Installation - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Installation

+ +

Dependency

+
    +
  • mindspore >= 1.8.1
  • +
  • numpy >= 1.17.0
  • +
  • pyyaml >= 5.3
  • +
  • tqdm
  • +
  • openmpi 4.0.3 (for distributed mode)
  • +
+

To install the python library dependency, just run:

+
pip install -r requirements.txt
+
+
+

Tip

+

MindSpore can be easily installed by following the official instructions where you can select your hardware platform for the best fit. +To run in distributed mode, OpenMPI is required to install.

+
+

The following instructions assume the desired dependency is fulfilled.

+

Install with PyPI

+

MindCV is published as a Python package and can be installed with +pip, ideally by using a virtual environment. Open up a terminal and install +MindCV with:

+
+
+
+
pip install mindcv
+
+
+
+
# working on it using test.pypi
+
+
+
+
+

This will automatically install compatible versions of dependencies: +NumPy, PyYAML and tqdm.

+
+

Tip

+

If you don't have prior experience with Python, we recommend reading +Using Python's pip to Manage Your Projects' Dependencies, which is a really +good introduction to the mechanics of Python package management and helps you +troubleshoot if you run into errors.

+
+
+

Warning

+

The above command will NOT install MindSpore. +We highly recommend you install MindSpore following the official instructions.

+
+

Install from Source (Bleeding Edge Version)

+

from VCS

+
pip install git+https://github.com/mindspore-lab/mindcv.git
+
+

from local src

+
+

Tip

+

As this project is in active development, if you are a developer or contributor, please prefer this installation!

+
+

MindCV can be directly used from GitHub by cloning the repository into a local folder which might be useful if you want to use the very latest version:

+
git clone https://github.com/mindspore-lab/mindcv.git
+
+

After cloning from git, it is recommended that you install using "editable" mode, which can help resolve potential module import issues:

+
cd mindcv
+pip install -e .
+
+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/modelzoo/index.html b/en/modelzoo/index.html new file mode 100644 index 000000000..16c1e0c89 --- /dev/null +++ b/en/modelzoo/index.html @@ -0,0 +1,2484 @@ + + + + + + + + + + + + + + + + + + + + + + + + Model Zoo - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Model Zoo

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModelContextTop-1 (%)Top-5 (%)Params(M)RecipeDownload
BiT_resnet50D910x8-G76.8193.1725.55yamlweights
BiT_resnet50x3D910x8-G80.6395.12217.31yamlweights
BiT_resnet101D910x8-G77.9393.7544.54yamlweights
coat_lite_tinyD910x8-G77.3593.435.72yamlweights
coat_lite_miniD910x8-G78.5193.8411.01yamlweights
coat_tinyD910x8-G79.6794.885.50yamlweights
coat_miniD910x8-G81.0895.3410.34yamlweights
convit_tinyD910x8-G73.6691.725.71yamlweights
convit_tiny_plusD910x8-G77.0093.609.97yamlweights
convit_smallD910x8-G81.6395.5927.78yamlweights
convit_small_plusD910x8-G81.8095.4248.98yamlweights
convit_baseD910x8-G82.1095.5286.54yamlweights
convit_base_plusD910x8-G81.9695.04153.13yamlweights
convnext_tinyD910x64-G81.9195.7928.59yamlweights
convnext_smallD910x64-G83.4096.3650.22yamlweights
convnext_baseD910x64-G83.3296.2488.59yamlweights
convnextv2_tinyD910x8-G82.4395.9828.64yamlweights
crossvit_9D910x8-G73.5691.798.55yamlweights
crossvit_15D910x8-G81.0895.3327.27yamlweights
crossvit_18D910x8-G81.9395.7543.27yamlweights
densenet121D910x8-G75.6492.848.06yamlweights
densenet161D910x8-G79.0994.6628.90yamlweights
densenet169D910x8-G77.2693.7114.31yamlweights
densenet201D910x8-G78.1494.0820.24yamlweights
dpn92D910x8-G79.4694.4937.79yamlweights
dpn98D910x8-G79.9494.5761.74yamlweights
dpn107D910x8-G80.0594.7487.13yamlweights
dpn131D910x8-G80.0794.7279.48yamlweights
edgenext_xx_smallD910x8-G71.0289.991.33yamlweights
edgenext_x_smallD910x8-G75.1492.502.34yamlweights
edgenext_smallD910x8-G79.1594.395.59yamlweights
edgenext_baseD910x8-G82.2495.9418.51yamlweights
efficientnet_b0D910x64-G76.8993.165.33yamlweights
efficientnet_b1D910x64-G78.9594.347.86yamlweights
ghostnet_050D910x8-G66.0386.642.60yamlweights
ghostnet_100D910x8-G73.7891.665.20yamlweights
ghostnet_130D910x8-G75.5092.567.39yamlweights
googlenetD910x8-G72.6890.896.99yamlweights
hrnet_w32D910x8-G80.6495.4441.30yamlweights
hrnet_w48D910x8-G81.1995.6977.57yamlweights
inception_v3D910x8-G79.1194.4027.20yamlweights
inception_v4D910x8-G80.8895.3442.74yamlweights
mixnet_sD910x8-G75.5292.524.17yamlweights
mixnet_mD910x8-G76.6493.055.06yamlweights
mixnet_lD910x8-G78.7394.317.38yamlweights
mnasnet_050D910x8-G68.0788.092.14yamlweights
mnasnet_075D910x8-G71.8190.533.20yamlweights
mnasnet_100D910x8-G74.2891.704.42yamlweights
mnasnet_130D910x8-G75.6592.646.33yamlweights
mnasnet_140D910x8-G76.0192.837.16yamlweights
mobilenet_v1_025D910x8-G53.8777.660.47yamlweights
mobilenet_v1_050D910x8-G65.9486.511.34yamlweights
mobilenet_v1_075D910x8-G70.4489.492.60yamlweights
mobilenet_v1_100D910x8-G72.9591.014.25yamlweights
mobilenet_v2_075D910x8-G69.9889.322.66yamlweights
mobilenet_v2_100D910x8-G72.2790.723.54yamlweights
mobilenet_v2_140D910x8-G75.5692.566.15yamlweights
mobilenet_v3_small_100D910x8-G68.1087.862.55yamlweights
mobilenet_v3_large_100D910x8-G75.2392.315.51yamlweights
mobilevit_xx_smallD910x8-G68.9188.911.27yamlweights
mobilevit_x_smallD910x8-G74.9992.322.32yamlweights
mobilevit_smallD910x8-G78.4794.185.59yamlweights
nasnet_a_4x1056D910x8-G73.6591.255.33yamlweights
pit_tiD910x8-G72.9691.334.85yamlweights
pit_xsD910x8-G78.4194.0610.61yamlweights
pit_sD910x8-G80.5694.8023.46yamlweights
pit_bD910x8-G81.8795.0473.76yamlweights
poolformer_s12D910x8-G77.3393.3411.92yamlweights
pvt_tinyD910x8-G74.8192.1813.23yamlweights
pvt_smallD910x8-G79.6694.7124.49yamlweights
pvt_mediumD910x8-G81.8295.8144.21yamlweights
pvt_largeD910x8-G81.7595.7061.36yamlweights
pvt_v2_b0D910x8-G71.5090.603.67yamlweights
pvt_v2_b1D910x8-G78.9194.4914.01yamlweights
pvt_v2_b2D910x8-G81.9995.7425.35yamlweights
pvt_v2_b3D910x8-G82.8496.2445.24yamlweights
pvt_v2_b4D910x8-G83.1496.2762.56yamlweights
regnet_x_200mfD910x8-G68.7488.382.68yamlweights
regnet_x_400mfD910x8-G73.1691.355.16yamlweights
regnet_x_600mfD910x8-G74.3492.006.20yamlweights
regnet_x_800mfD910x8-G76.0492.977.26yamlweights
regnet_y_200mfD910x8-G70.3089.613.16yamlweights
regnet_y_400mfD910x8-G73.9191.844.34yamlweights
regnet_y_600mfD910x8-G75.6992.506.06yamlweights
regnet_y_800mfD910x8-G76.5293.106.26yamlweights
regnet_y_16gfD910x8-G82.9296.2983.71yamlweights
repmlp_t224D910x8-G76.7193.3038.30yamlweights
repvgg_a0D910x8-G72.1990.759.13yamlweights
repvgg_a1D910x8-G74.1991.8914.12yamlweights
repvgg_a2D910x8-G76.6393.4228.25yamlweights
repvgg_b0D910x8-G74.9992.4015.85yamlweights
repvgg_b1D910x8-G78.8194.3757.48yamlweights
repvgg_b2D910x64-G79.2994.6689.11yamlweights
repvgg_b3D910x64-G80.4695.34123.19yamlweights
repvgg_b1g2D910x8-G78.0394.0945.85yamlweights
repvgg_b1g4D910x8-G77.6494.0340.03yamlweights
repvgg_b2g4D910x8-G78.894.3661.84yamlweights
res2net50D910x8-G79.3594.6425.76yamlweights
res2net101D910x8-G79.5694.7045.33yamlweights
res2net50_v1bD910x8-G80.3295.0925.77yamlweights
res2net101_v1bD910x8-G81.1495.4145.35yamlweights
resnest50D910x8-G80.8195.1627.55yamlweights
resnest101D910x8-G82.9096.1248.41yamlweights
resnet18D910x8-G70.2189.6211.70yamlweights
resnet34D910x8-G74.1591.9821.81yamlweights
resnet50D910x8-G76.6993.5025.61yamlweights
resnet101D910x8-G78.2494.0944.65yamlweights
resnet152D910x8-G78.7294.4560.34yamlweights
resnetv2_50D910x8-G76.9093.3725.60yamlweights
resnetv2_101D910x8-G78.4894.2344.55yamlweights
resnext50_32x4dD910x8-G78.5394.1025.10yamlweights
resnext101_32x4dD910x8-G79.8394.8044.32yamlweights
resnext101_64x4dD910x8-G80.3094.8283.66yamlweights
resnext152_64x4dD910x8-G80.5295.00115.27yamlweights
rexnet_09D910x8-G77.0693.414.13yamlweights
rexnet_10D910x8-G77.3893.604.84yamlweights
rexnet_13D910x8-G79.0694.287.61yamlweights
rexnet_15D910x8-G79.9594.749.79yamlweights
rexnet_20D910x8-G80.6494.9916.45yamlweights
seresnet18D910x8-G71.8190.4911.80yamlweights
seresnet34D910x8-G75.3892.5021.98yamlweights
seresnet50D910x8-G78.3294.0728.14yamlweights
seresnext26_32x4dD910x8-G77.1793.4216.83yamlweights
seresnext50_32x4dD910x8-G78.7194.3627.63yamlweights
shufflenet_v1_g3_05D910x8-G57.0579.730.73yamlweights
shufflenet_v1_g3_10D910x8-G67.7787.731.89yamlweights
shufflenet_v2_x0_5D910x8-G60.5382.111.37yamlweights
shufflenet_v2_x1_0D910x8-G69.4788.882.29yamlweights
shufflenet_v2_x1_5D910x8-G72.7990.933.53yamlweights
shufflenet_v2_x2_0D910x8-G75.0792.087.44yamlweights
skresnet18D910x8-G73.0991.2011.97yamlweights
skresnet34D910x8-G76.7193.1022.31yamlweights
skresnext50_32x4dD910x8-G79.0894.6037.31yamlweights
squeezenet1_0D910x8-G59.0181.011.25yamlweights
squeezenet1_0GPUx8-G58.8381.081.25yamlweights
squeezenet1_1D910x8-G58.4480.841.24yamlweights
squeezenet1_1GPUx8-G59.1881.411.24yamlweights
swin_tinyD910x8-G80.8294.8033.38yamlweights
swinv2_tiny_window8D910x8-G81.4295.4328.78yamlweights
vgg11D910x8-G71.8690.50132.86yamlweights
vgg13D910x8-G72.8791.02133.04yamlweights
vgg16D910x8-G74.6191.87138.35yamlweights
vgg19D910x8-G75.2192.56143.66yamlweights
visformer_tinyD910x8-G78.2894.1510.33yamlweights
visformer_tiny_v2D910x8-G78.8294.419.38yamlweights
visformer_smallD910x8-G81.7695.8840.25yamlweights
visformer_small_v2D910x8-G82.1795.9023.52yamlweights
vit_b_32_224D910x8-G75.8692.0887.46yamlweights
vit_l_16_224D910x8-G76.3492.79303.31yamlweights
vit_l_32_224D910x8-G73.7190.92305.52yamlweights
volo_d1D910x8-G82.5995.9927yamlweights
xceptionD910x8-G79.0194.2522.91yamlweights
xcit_tiny_12_p16_224D910x8-G77.6793.797.00yamlweights
+

Notes

+
    +
  • Context: Training context denoted as {device}x{pieces}-{MS mode}, where mindspore mode can be G - graph mode or F - pynative mode with ms function. For example, D910x8-G is for training on 8 pieces of Ascend 910 NPU using graph mode.
  • +
  • Top-1 and Top-5: Accuracy reported on the validation set of ImageNet-1K.
  • +
+ + + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/notes/changelog/index.html b/en/notes/changelog/index.html new file mode 100644 index 000000000..d4d0635d3 --- /dev/null +++ b/en/notes/changelog/index.html @@ -0,0 +1,1034 @@ + + + + + + + + + + + + + + + + + + + + + + + + Change Log - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Change Log

+

Coming soon.

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/notes/code_of_conduct/index.html b/en/notes/code_of_conduct/index.html new file mode 100644 index 000000000..3995920e3 --- /dev/null +++ b/en/notes/code_of_conduct/index.html @@ -0,0 +1,1034 @@ + + + + + + + + + + + + + + + + + + + + + + + + Code of Conduct - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Code of Conduct

+

Coming soon.

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/notes/contributing/index.html b/en/notes/contributing/index.html new file mode 100644 index 000000000..c8eb58ecd --- /dev/null +++ b/en/notes/contributing/index.html @@ -0,0 +1,1337 @@ + + + + + + + + + + + + + + + + + + + + + + + + Contributing - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + +

MindCV Contributing Guidelines

+

Contributions are welcome, and they are greatly appreciated! Every little bit +helps, and credit will always be given.

+

Contributor License Agreement

+

It's required to sign CLA before your first code submission to MindCV community.

+

For individual contributor, please refer to ICLA online document for the detailed information.

+

Types of Contributions

+

Report Bugs

+

Report bugs at https://github.com/mindspore-lab/mindcv/issues.

+

If you are reporting a bug, please include:

+
    +
  • Your operating system name and version.
  • +
  • Any details about your local setup that might be helpful in troubleshooting.
  • +
  • Detailed steps to reproduce the bug.
  • +
+

Fix Bugs

+

Look through the GitHub issues for bugs. Anything tagged with "bug" and "help +wanted" is open to whoever wants to implement it.

+

Implement Features

+

Look through the GitHub issues for features. Anything tagged with "enhancement" +and "help wanted" is open to whoever wants to implement it.

+

Write Documentation

+

MindCV could always use more documentation, whether as part of the +official MindCV docs, in docstrings, or even on the web in blog posts, +articles, and such.

+

Submit Feedback

+

The best way to send feedback is to file an issue at https://github.com/mindspore-lab/mindcv/issues.

+

If you are proposing a feature:

+
    +
  • Explain in detail how it would work.
  • +
  • Keep the scope as narrow as possible, to make it easier to implement.
  • +
  • Remember that this is a volunteer-driven project, and that contributions are welcome :)
  • +
+

Getting Started

+

Ready to contribute? Here's how to set up mindcv for local development.

+
    +
  1. Fork the mindcv repo on GitHub.
  2. +
  3. Clone your fork locally:
  4. +
+
git clone git@github.com:your_name_here/mindcv.git
+
+

After that, you should add official repository as the upstream repository:

+
git remote add upstream git@github.com:mindspore-lab/mindcv
+
+
    +
  1. Install your local copy into a conda environment. Assuming you have conda installed, this is how you set up your fork for local development:
  2. +
+
conda create -n mindcv python=3.8
+conda activate mindcv
+cd mindcv
+pip install -e .
+
+
    +
  1. Create a branch for local development:
  2. +
+
git checkout -b name-of-your-bugfix-or-feature
+
+

Now you can make your changes locally.

+
    +
  1. When you're done making changes, check that your changes pass the linters and the tests:
  2. +
+
pre-commit run --show-diff-on-failure --color=always --all-files
+pytest
+
+

If all static linting are passed, you will get output like:

+

pre-commit-succeed

+

otherwise, you need to fix the warnings according to the output:

+

pre-commit-failed

+

To get pre-commit and pytest, just pip install them into your conda environment.

+
    +
  1. Commit your changes and push your branch to GitHub:
  2. +
+
git add .
+git commit -m "Your detailed description of your changes."
+git push origin name-of-your-bugfix-or-feature
+
+
    +
  1. Submit a pull request through the GitHub website.
  2. +
+

Pull Request Guidelines

+

Before you submit a pull request, check that it meets these guidelines:

+
    +
  1. The pull request should include tests.
  2. +
  3. If the pull request adds functionality, the docs should be updated. Put + your new functionality into a function with a docstring, and add the + feature to the list in README.md.
  4. +
  5. The pull request should work for Python 3.7, 3.8 and 3.9, and for PyPy. Check + https://github.com/mindspore-lab/mindcv/actions + and make sure that the tests pass for all supported Python versions.
  6. +
+

Tips

+

You can install the git hook scripts instead of linting with pre-commit run -a manually.

+

run flowing command to set up the git hook scripts

+
pre-commit install
+
+

now pre-commit will run automatically on git commit!

+

Releasing

+

A reminder for the maintainers on how to deploy. +Make sure all your changes are committed (including an entry in HISTORY.md). +Then run:

+
bump2version patch # possible: major / minor / patch
+git push
+git push --tags
+
+

GitHub Action will then deploy to PyPI if tests pass.

+ + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/notes/faq/index.html b/en/notes/faq/index.html new file mode 100644 index 000000000..ff281414f --- /dev/null +++ b/en/notes/faq/index.html @@ -0,0 +1,1017 @@ + + + + + + + + + + + + + + + + + + + + + + FAQ - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+ +
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/reference/data/index.html b/en/reference/data/index.html new file mode 100644 index 000000000..e768d4824 --- /dev/null +++ b/en/reference/data/index.html @@ -0,0 +1,4180 @@ + + + + + + + + + + + + + + + + + + + + + + + + data - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Data

+

Auto Augmentation

+ + + +
+ + + +

+mindcv.data.auto_augment.auto_augment_transform(configs, hparams) + +

+ + +
+ +

Create a AutoAugment transform

+ + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
configs +
+

A string that defines the automatic augmentation configuration. +It is composed of multiple parts separated by dashes ("-"). The first part defines +the AutoAugment policy ('autoaug', 'autoaugr' or '3a': +'autoaug' for the original AutoAugment policy with PosterizeOriginal, +'autoaugr' for the AutoAugment policy with PosterizeIncreasing operation, + '3a' for the AutoAugment only with 3 augmentations.) +There is no order requirement for the remaining config parts.

+
    +
  • mstd: Float standard deviation of applied magnitude noise.
  • +
+

Example: 'autoaug-mstd0.5' will be automatically augment using the autoaug strategy +and magnitude_std 0.5.

+
+

+

+
hparams +
+

Other hparams of the automatic augmentation scheme.

+
+

+

+
+ +
+ Source code in mindcv/data/auto_augment.py +
419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
def auto_augment_transform(configs, hparams):
+    """
+    Create a AutoAugment transform
+    Args:
+        configs: A string that defines the automatic augmentation configuration.
+            It is composed of multiple parts separated by dashes ("-"). The first part defines
+            the AutoAugment policy ('autoaug', 'autoaugr' or '3a':
+            'autoaug' for the original AutoAugment policy with PosterizeOriginal,
+            'autoaugr' for the AutoAugment policy with PosterizeIncreasing operation,
+             '3a' for the AutoAugment only with 3 augmentations.)
+            There is no order requirement for the remaining config parts.
+
+            - mstd: Float standard deviation of applied magnitude noise.
+
+            Example: 'autoaug-mstd0.5' will be automatically augment using the autoaug strategy
+            and magnitude_std 0.5.
+        hparams: Other hparams of the automatic augmentation scheme.
+    """
+    config = configs.split("-")
+    policy_name = config[0]
+    config = config[1:]
+    hparams.setdefault("magnitude_std", 0.5)  # default magnitude_std is set to 0.5
+    for c in config:
+        cs = re.split(r"(\d.*)", c)
+        if len(cs) < 2:
+            continue
+        key, val = cs[:2]
+        if key == "mstd":
+            # noise param injected via hparams for now
+            hparams.setdefault("magnitude_std", float(val))
+        else:
+            assert False, "Unknown AutoAugment config section"
+    aa_policy = auto_augment_policy(policy_name, hparams=hparams)
+    return AutoAugment(aa_policy)
+
+
+
+ +
+ + +
+ + + +

+mindcv.data.auto_augment.rand_augment_transform(configs, hparams) + +

+ + +
+ +

Create a RandAugment transform

+ + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
configs +
+

A string that defines the random augmentation configuration. +It is composed of multiple parts separated by dashes ("-"). +The first part defines the AutoAugment policy ('randaug' policy). +There is no order requirement for the remaining config parts.

+
    +
  • m: Integer magnitude of rand augment. Default: 10
  • +
  • n: Integer num layer (number of transform operations selected for each image). Default: 2
  • +
  • w: Integer probability weight index (the index that affects a group of weights selected by operations).
  • +
  • mstd: Floating standard deviation of applied magnitude noise, + or uniform sampling at infinity (or greater than 100).
  • +
  • mmax: Set the upper range limit for magnitude to a value + other than the default value of _LEVEL_DENOM (10).
  • +
  • inc: Integer (bool), using the severity increase with magnitude (default: 0).
  • +
+

Example: 'randaug-w0-n3-mstd0.5' will be random augment + using the weights 0, num_layers 3, magnitude_std 0.5.

+
+

+

+
hparams +
+

Other hparams (kwargs) for the RandAugmentation scheme.

+
+

+

+
+ +
+ Source code in mindcv/data/auto_augment.py +
538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
def rand_augment_transform(configs, hparams):
+    """
+    Create a RandAugment transform
+    Args:
+        configs: A string that defines the random augmentation configuration.
+            It is composed of multiple parts separated by dashes ("-").
+            The first part defines the AutoAugment policy ('randaug' policy).
+            There is no order requirement for the remaining config parts.
+
+            - m: Integer magnitude of rand augment. Default: 10
+            - n: Integer num layer (number of transform operations selected for each image). Default: 2
+            - w: Integer probability weight index (the index that affects a group of weights selected by operations).
+            - mstd: Floating standard deviation of applied magnitude noise,
+                or uniform sampling at infinity (or greater than 100).
+            - mmax: Set the upper range limit for magnitude to a value
+                other than the default value of _LEVEL_DENOM (10).
+            - inc: Integer (bool), using the severity increase with magnitude (default: 0).
+
+            Example: 'randaug-w0-n3-mstd0.5' will be random augment
+                using the weights 0, num_layers 3, magnitude_std 0.5.
+        hparams: Other hparams (kwargs) for the RandAugmentation scheme.
+    """
+    magnitude = _LEVEL_DENOM  # default to _LEVEL_DENOM for magnitude (currently 10)
+    num_layers = 2  # default to 2 ops per image
+    hparams.setdefault("magnitude_std", 0.5)  # default magnitude_std is set to 0.5
+    weight_idx = None  # default to no probability weights for op choice
+    transforms = _RAND_TRANSFORMS
+    config = configs.split("-")
+    assert config[0] == "randaug"
+    config = config[1:]
+    for c in config:
+        cs = re.split(r"(\d.*)", c)
+        if len(cs) < 2:
+            continue
+        key, val = cs[:2]
+        if key == "mstd":
+            # noise param / randomization of magnitude values
+            mstd = float(val)
+            if mstd > 100:
+                # use uniform sampling in 0 to magnitude if mstd is > 100
+                mstd = float("inf")
+            hparams.setdefault("magnitude_std", mstd)
+        elif key == "mmax":
+            # clip magnitude between [0, mmax] instead of default [0, _LEVEL_DENOM]
+            hparams.setdefault("magnitude_max", int(val))
+        elif key == "inc":
+            if bool(val):
+                transforms = _RAND_INCREASING_TRANSFORMS
+        elif key == "m":
+            magnitude = int(val)
+        elif key == "n":
+            num_layers = int(val)
+        elif key == "w":
+            weight_idx = int(val)
+        else:
+            assert False, "Unknown RandAugment config section"
+    ra_ops = rand_augment_ops(magnitude=magnitude, hparams=hparams, transforms=transforms)
+    choice_weights = None if weight_idx is None else _select_rand_weights(weight_idx)
+    return RandAugment(ra_ops, num_layers, choice_weights=choice_weights)
+
+
+
+ +
+ + +
+ + + +

+mindcv.data.auto_augment.trivial_augment_wide_transform(configs, hparams) + +

+ + +
+ +

Create a TrivialAugmentWide transform

+ + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
configs +
+

A string that defines the TrivialAugmentWide configuration. +It is composed of multiple parts separated by dashes ("-"). +The first part defines the AutoAugment name, it should be 'trivialaugwide'. +the second part(not necessary) the maximum value of magnitude.

+
    +
  • m: final magnitude of a operation will uniform sampling from [0, m] . Default: 31
  • +
+

Example: 'trivialaugwide-m20' will be TrivialAugmentWide +with mgnitude uniform sampling from [0, 20],

+
+

+

+
hparams +
+

Other hparams (kwargs) for the TrivialAugment scheme.

+
+

+

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

A Mindspore compatible Transform

+
+
+ +
+ Source code in mindcv/data/auto_augment.py +
618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
def trivial_augment_wide_transform(configs, hparams):
+    """
+    Create a TrivialAugmentWide transform
+    Args:
+        configs: A string that defines the TrivialAugmentWide configuration.
+            It is composed of multiple parts separated by dashes ("-").
+            The first part defines the AutoAugment name, it should be 'trivialaugwide'.
+            the second part(not necessary) the maximum value of magnitude.
+
+            - m: final magnitude of a operation will uniform sampling from [0, m] . Default: 31
+
+            Example: 'trivialaugwide-m20' will be TrivialAugmentWide
+            with mgnitude uniform sampling from [0, 20],
+        hparams: Other hparams (kwargs) for the TrivialAugment scheme.
+    Returns:
+        A Mindspore compatible Transform
+    """
+    magnitude = 31
+    transforms = _TRIVIALAUGMENT_WIDE_TRANSFORMS
+    config = configs.split("-")
+    assert config[0] == "trivialaugwide"
+    config = config[1:]
+    for c in config:
+        cs = re.split(r"(\d.*)", c)
+        if len(cs) < 2:
+            continue
+        key, val = cs[:2]
+        if key == "m":
+            magnitude = int(val)
+        else:
+            assert False, "Unknown TrivialAugmentWide config section"
+    if not hparams:
+        hparams = dict()
+    hparams["magnitude_max"] = magnitude
+    hparams["magnitude_std"] = float("inf")  # default to uniform sampling
+    hparams["trivialaugwide"] = True
+    ta_ops = trivial_augment_wide_ops(magnitude=magnitude, hparams=hparams, transforms=transforms)
+    return TrivialAugmentWide(ta_ops)
+
+
+
+ +
+ + +
+ + + +

+mindcv.data.auto_augment.augment_and_mix_transform(configs, hparams=None) + +

+ + +
+ +

Create AugMix PyTorch transform

+ + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
configs +
+

String defining configuration of AugMix augmentation. Consists of multiple sections separated +by dashes ('-'). The first section defines the specific name of augment, it should be 'augmix'. +The remaining sections, not order sepecific determine + 'm' - integer magnitude (severity) of augmentation mix (default: 3) + 'w' - integer width of augmentation chain (default: 3) + 'd' - integer depth of augmentation chain (-1 is random [1, 3], default: -1) + 'a' - integer or float, the args of beta deviation of beta for generate the weight, default 1.. +Ex 'augmix-m5-w4-d2' results in AugMix with severity 5, chain width 4, chain depth 2

+
+

+ + TYPE: + str + +

+
hparams +
+

Other hparams (kwargs) for the Augmentation transforms

+
+

+ + DEFAULT: + None + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

A Mindspore compatible Transform

+
+
+ +
+ Source code in mindcv/data/auto_augment.py +
710
+711
+712
+713
+714
+715
+716
+717
+718
+719
+720
+721
+722
+723
+724
+725
+726
+727
+728
+729
+730
+731
+732
+733
+734
+735
+736
+737
+738
+739
+740
+741
+742
+743
+744
+745
+746
+747
+748
+749
+750
+751
+752
+753
+754
def augment_and_mix_transform(configs, hparams=None):
+    """Create AugMix PyTorch transform
+
+    Args:
+        configs (str): String defining configuration of AugMix augmentation. Consists of multiple sections separated
+            by dashes ('-'). The first section defines the specific name of augment, it should be 'augmix'.
+            The remaining sections, not order sepecific determine
+                'm' - integer magnitude (severity) of augmentation mix (default: 3)
+                'w' - integer width of augmentation chain (default: 3)
+                'd' - integer depth of augmentation chain (-1 is random [1, 3], default: -1)
+                'a' - integer or float, the args of beta deviation of beta for generate the weight, default 1..
+            Ex 'augmix-m5-w4-d2' results in AugMix with severity 5, chain width 4, chain depth 2
+
+        hparams: Other hparams (kwargs) for the Augmentation transforms
+
+    Returns:
+         A Mindspore compatible Transform
+    """
+    magnitude = 3
+    width = 3
+    depth = -1
+    alpha = 1.0
+    config = configs.split("-")
+    assert config[0] == "augmix"
+    config = config[1:]
+    for c in config:
+        cs = re.split(r"(\d.*)", c)
+        if len(cs) < 2:
+            continue
+        key, val = cs[:2]
+        if key == "m":
+            magnitude = int(val)
+        elif key == "w":
+            width = int(val)
+        elif key == "d":
+            depth = int(val)
+        elif key == "a":
+            alpha = float(val)
+        else:
+            assert False, "Unknown AugMix config section"
+    if not hparams:
+        hparams = dict()
+    hparams["magnitude_std"] = float("inf")  # default to uniform sampling (if not set via mstd arg)
+    ops = augmix_ops(magnitude=magnitude, hparams=hparams)
+    return AugMixAugment(ops, alpha=alpha, width=width, depth=depth)
+
+
+
+ +

Dataset Factory

+ + + +
+ + + +

+mindcv.data.dataset_factory.create_dataset(name='', root=None, split='train', shuffle=True, num_samples=None, num_shards=None, shard_id=None, num_parallel_workers=None, download=False, num_aug_repeats=0, **kwargs) + +

+ + +
+ +

Creates dataset by name.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
name +
+

dataset name like MNIST, CIFAR10, ImageNeT, ''. '' means a customized dataset. Default: ''.

+
+

+ + TYPE: + str + + + DEFAULT: + '' + +

+
root +
+

dataset root dir. Default: None.

+
+

+ + TYPE: + Optional[str] + + + DEFAULT: + None + +

+
split +
+

data split: '' or split name string (train/val/test), if it is '', no split is used. +Otherwise, it is a subfolder of root dir, e.g., train, val, test. Default: 'train'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'train' + +

+
shuffle +
+

whether to shuffle the dataset. Default: True.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
num_samples +
+

Number of elements to sample (default=None, which means sample all elements).

+
+

+ + TYPE: + Optional[int] + + + DEFAULT: + None + +

+
num_shards +
+

Number of shards that the dataset will be divided into (default=None). +When this argument is specified, num_samples reflects the maximum sample number of per shard.

+
+

+ + TYPE: + Optional[int] + + + DEFAULT: + None + +

+
shard_id +
+

The shard ID within num_shards (default=None). +This argument can only be specified when num_shards is also specified.

+
+

+ + TYPE: + Optional[int] + + + DEFAULT: + None + +

+
num_parallel_workers +
+

Number of workers to read the data (default=None, set in the config).

+
+

+ + TYPE: + Optional[int] + + + DEFAULT: + None + +

+
download +
+

whether to download the dataset. Default: False

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_aug_repeats +
+

Number of dataset repetition for repeated augmentation. +If 0 or 1, repeated augmentation is disabled. +Otherwise, repeated augmentation is enabled and the common choice is 3. (Default: 0)

+
+

+ + TYPE: + int + + + DEFAULT: + 0 + +

+
+ +
+ Note +

For custom datasets and imagenet, the dataset dir should follow the structure like: +.dataset_name/ +├── split1/ +│ ├── class1/ +│ │ ├── 000001.jpg +│ │ ├── 000002.jpg +│ │ └── .... +│ └── class2/ +│ ├── 000001.jpg +│ ├── 000002.jpg +│ └── .... +└── split2/ + ├── class1/ + │ ├── 000001.jpg + │ ├── 000002.jpg + │ └── .... + └── class2/ + ├── 000001.jpg + ├── 000002.jpg + └── ....

+
+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

Dataset object

+
+
+ +
+ Source code in mindcv/data/dataset_factory.py +
 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
def create_dataset(
+    name: str = "",
+    root: Optional[str] = None,
+    split: str = "train",
+    shuffle: bool = True,
+    num_samples: Optional[int] = None,
+    num_shards: Optional[int] = None,
+    shard_id: Optional[int] = None,
+    num_parallel_workers: Optional[int] = None,
+    download: bool = False,
+    num_aug_repeats: int = 0,
+    **kwargs,
+):
+    r"""Creates dataset by name.
+
+    Args:
+        name: dataset name like MNIST, CIFAR10, ImageNeT, ''. '' means a customized dataset. Default: ''.
+        root: dataset root dir. Default: None.
+        split: data split: '' or split name string (train/val/test), if it is '', no split is used.
+            Otherwise, it is a subfolder of root dir, e.g., train, val, test. Default: 'train'.
+        shuffle: whether to shuffle the dataset. Default: True.
+        num_samples: Number of elements to sample (default=None, which means sample all elements).
+        num_shards: Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
+        shard_id: The shard ID within `num_shards` (default=None).
+            This argument can only be specified when `num_shards` is also specified.
+        num_parallel_workers: Number of workers to read the data (default=None, set in the config).
+        download: whether to download the dataset. Default: False
+        num_aug_repeats: Number of dataset repetition for repeated augmentation.
+            If 0 or 1, repeated augmentation is disabled.
+            Otherwise, repeated augmentation is enabled and the common choice is 3. (Default: 0)
+
+    Note:
+        For custom datasets and imagenet, the dataset dir should follow the structure like:
+        .dataset_name/
+        ├── split1/
+        │  ├── class1/
+        │  │   ├── 000001.jpg
+        │  │   ├── 000002.jpg
+        │  │   └── ....
+        │  └── class2/
+        │      ├── 000001.jpg
+        │      ├── 000002.jpg
+        │      └── ....
+        └── split2/
+           ├── class1/
+           │   ├── 000001.jpg
+           │   ├── 000002.jpg
+           │   └── ....
+           └── class2/
+               ├── 000001.jpg
+               ├── 000002.jpg
+               └── ....
+
+    Returns:
+        Dataset object
+    """
+    name = name.lower()
+    if root is None:
+        root = os.path.join(get_dataset_download_root(), name)
+
+    assert (num_samples is None) or (num_aug_repeats == 0), "num_samples and num_aug_repeats can NOT be set together."
+
+    # subset sampling
+    if num_samples is not None and num_samples > 0:
+        # TODO: rewrite ordered distributed sampler (subset sampling in distributed mode is not tested)
+        if num_shards is not None and num_shards > 1:  # distributed
+            _logger.info(f"number of shards: {num_shards}, number of samples: {num_samples}")
+            sampler = DistributedSampler(num_shards, shard_id, shuffle=shuffle, num_samples=num_samples)
+        else:  # standalone
+            if shuffle:
+                sampler = ds.RandomSampler(replacement=False, num_samples=num_samples)
+            else:
+                sampler = ds.SequentialSampler(num_samples=num_samples)
+        mindspore_kwargs = dict(
+            shuffle=None,
+            sampler=sampler,
+            num_parallel_workers=num_parallel_workers,
+            **kwargs,
+        )
+    else:
+        sampler = None
+        mindspore_kwargs = dict(
+            shuffle=shuffle,
+            sampler=sampler,
+            num_shards=num_shards,
+            shard_id=shard_id,
+            num_parallel_workers=num_parallel_workers,
+            **kwargs,
+        )
+
+    # sampler for repeated augmentation
+    if num_aug_repeats > 0:
+        dataset_size = get_dataset_size(name, root, split)
+        _logger.info(
+            f"Repeated augmentation is enabled, num_aug_repeats: {num_aug_repeats}, "
+            f"original dataset size: {dataset_size}."
+        )
+        # since drop_remainder is usually True, we don't need to do rounding in sampling
+        sampler = RepeatAugSampler(
+            dataset_size,
+            num_shards=num_shards,
+            rank_id=shard_id,
+            num_repeats=num_aug_repeats,
+            selected_round=0,
+            shuffle=shuffle,
+        )
+        mindspore_kwargs = dict(shuffle=None, sampler=sampler, num_shards=None, shard_id=None, **kwargs)
+
+    # create dataset
+    if name in _MINDSPORE_BASIC_DATASET:
+        dataset_class = _MINDSPORE_BASIC_DATASET[name][0]
+        dataset_download = _MINDSPORE_BASIC_DATASET[name][1]
+        dataset_new_path = None
+        if download:
+            if shard_id is not None:
+                root = os.path.join(root, f"dataset_{str(shard_id)}")
+            dataset_download = dataset_download(root)
+            dataset_download.download()
+            dataset_new_path = dataset_download.path
+
+        dataset = dataset_class(
+            dataset_dir=dataset_new_path if dataset_new_path else root,
+            usage=split,
+            **mindspore_kwargs,
+        )
+        # address ms dataset num_classes empty issue
+        if name == "mnist":
+            dataset.num_classes = lambda: 10
+        elif name == "cifar10":
+            dataset.num_classes = lambda: 10
+        elif name == "cifar100":
+            dataset.num_classes = lambda: 100
+
+    else:
+        if name == "imagenet" and download:
+            raise ValueError(
+                "Imagenet dataset download is not supported. "
+                "Please download imagenet from https://www.image-net.org/download.php, "
+                "and parse the path of dateset directory via args.data_dir."
+            )
+
+        if os.path.isdir(root):
+            root = os.path.join(root, split)
+        dataset = ImageFolderDataset(dataset_dir=root, **mindspore_kwargs)
+        """ Another implementation which a bit slower than ImageFolderDataset
+            imagenet_dataset = ImageNetDataset(dataset_dir=root)
+            sampler = RepeatAugSampler(len(imagenet_dataset), num_shards=num_shards, rank_id=shard_id,
+                                       num_repeats=repeated_aug, selected_round=1, shuffle=shuffle)
+            dataset = ds.GeneratorDataset(imagenet_dataset, column_names=imagenet_dataset.column_names, sampler=sampler)
+        """
+    return dataset
+
+
+
+ +

Sampler

+ + +
+ + + +

+ mindcv.data.distributed_sampler.RepeatAugSampler + + +

+ + +
+ + +

Sampler that restricts data loading to a subset of the dataset for distributed, +with repeated augmentation. +It ensures that different each augmented version of a sample will be visible to a +different process.

+

This sampler was adapted from https://github.com/facebookresearch/deit/blob/0c4b8f60/samplers.py

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
dataset_size +
+

dataset size.

+
+

+

+
num_shards +
+

num devices.

+
+

+ + DEFAULT: + None + +

+
rank_id +
+

device id.

+
+

+ + DEFAULT: + None + +

+
shuffle(bool) +
+

True for using shuffle, False for not using.

+
+

+

+
num_repeats(int) +
+

num of repeated instances in repeated augmentation, Default:3.

+
+

+

+
selected_round(int) +
+

round the total num of samples by this factor, Defailt:256.

+
+

+

+
+ +
+ Source code in mindcv/data/distributed_sampler.py +
10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
+87
+88
+89
+90
class RepeatAugSampler:
+    """Sampler that restricts data loading to a subset of the dataset for distributed,
+    with repeated augmentation.
+    It ensures that different each augmented version of a sample will be visible to a
+    different process.
+
+    This sampler was adapted from https://github.com/facebookresearch/deit/blob/0c4b8f60/samplers.py
+
+    Args:
+        dataset_size: dataset size.
+        num_shards: num devices.
+        rank_id: device id.
+        shuffle(bool): True for using shuffle, False for not using.
+        num_repeats(int): num of repeated instances in repeated augmentation, Default:3.
+        selected_round(int): round the total num of samples by this factor, Defailt:256.
+    """
+
+    def __init__(
+        self,
+        dataset_size,
+        num_shards=None,
+        rank_id=None,
+        shuffle=True,
+        num_repeats=3,
+        selected_round=256,
+    ):
+        if num_shards is None:
+            _logger.warning("num_shards is set to 1 in RepeatAugSampler since it is not passed in")
+            num_shards = 1
+        if rank_id is None:
+            rank_id = 0
+
+        # assert isinstance(num_repeats, int), f'num_repeats should be Type integer, but got {type(num_repeats)}'
+
+        self.dataset_size = dataset_size
+        self.num_shards = num_shards
+        self.rank_id = rank_id
+        self.shuffle = shuffle
+        self.num_repeats = int(num_repeats)
+        self.epoch = 0
+        self.num_samples = int(math.ceil(self.dataset_size * num_repeats / self.num_shards))
+        self.total_size = self.num_samples * self.num_shards
+        # Determine the number of samples to select per epoch for each rank.
+        if selected_round:
+            self.num_selected_samples = int(
+                math.floor(self.dataset_size // selected_round * selected_round / num_shards)
+            )
+        else:
+            self.num_selected_samples = int(math.ceil(self.dataset_size / num_shards))
+
+    def __iter__(self):
+        # deterministically shuffle based on epoch
+        # print('__iter__  generating new shuffled indices: ', self.epoch)
+        if self.shuffle:
+            indices = np.random.RandomState(seed=self.epoch).permutation(self.dataset_size)
+            indices = indices.tolist()
+            self.epoch += 1
+            # print(indices[:30])
+        else:
+            indices = list(range(self.dataset_size))
+        # produce repeats e.g. [0, 0, 0, 1, 1, 1, 2, 2, 2....]
+        indices = [ele for ele in indices for i in range(self.num_repeats)]
+
+        # add extra samples to make it evenly divisible
+        padding_size = self.total_size - len(indices)
+        if padding_size > 0:
+            indices += indices[:padding_size]
+        assert len(indices) == self.total_size
+
+        # subsample per rank
+        indices = indices[self.rank_id : self.total_size : self.num_shards]
+        assert len(indices) == self.num_samples
+
+        # return up to num selected samples
+        return iter(indices[: self.num_selected_samples])
+
+    def __len__(self):
+        return self.num_selected_samples
+
+    def set_epoch(self, epoch):
+        self.epoch = epoch
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

DataLoader

+ + + +
+ + + +

+mindcv.data.loader.create_loader(dataset, batch_size, drop_remainder=False, is_training=False, mixup=0.0, cutmix=0.0, cutmix_prob=0.0, num_classes=1000, transform=None, target_transform=None, num_parallel_workers=None, python_multiprocessing=False, separate=False) + +

+ + +
+ +

Creates dataloader.

+

Applies operations such as transform and batch to the ms.dataset.Dataset object +created by the create_dataset function to get the dataloader.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
dataset +
+

dataset object created by create_dataset.

+
+

+ + TYPE: + ms.dataset.Dataset + +

+
batch_size +
+

The number of rows each batch is created with. An +int or callable object which takes exactly 1 parameter, BatchInfo.

+
+

+ + TYPE: + int or function + +

+
drop_remainder +
+

Determines whether to drop the last block +whose data row number is less than batch size (default=False). If True, and if there are less +than batch_size rows available to make the last batch, then those rows will +be dropped and not propagated to the child node.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
is_training +
+

whether it is in train mode. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
mixup +
+

mixup alpha, mixup will be enabled if > 0. (default=0.0).

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
cutmix +
+

cutmix alpha, cutmix will be enabled if > 0. (default=0.0). This operation is experimental.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
cutmix_prob +
+

prob of doing cutmix for an image (default=0.0)

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
num_classes +
+

the number of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
transform +
+

the list of transformations that wil be applied on the image, +which is obtained by create_transform. If None, the default imagenet transformation +for evaluation will be applied. Default: None.

+
+

+ + TYPE: + list or None + + + DEFAULT: + None + +

+
target_transform +
+

the list of transformations that will be applied on the label. +If None, the label will be converted to the type of ms.int32. Default: None.

+
+

+ + TYPE: + list or None + + + DEFAULT: + None + +

+
num_parallel_workers +
+

Number of workers(threads) to process the dataset in parallel +(default=None).

+
+

+ + TYPE: + int + + + DEFAULT: + None + +

+
python_multiprocessing +
+

Parallelize Python operations with multiple worker processes. This +option could be beneficial if the Python operation is computational heavy (default=False).

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
separate(bool, +
+

separate the image origin and the image been transformed

+
+

+ + TYPE: + optional + +

+
+ +
+ Note +
    +
  1. cutmix is now experimental (which means performance gain is not guarantee) + and can not be used together with mixup due to the label int type conflict.
  2. +
  3. is_training, mixup, num_classes is used for MixUp, which is a kind of transform operation. + However, we are not able to merge it into transform, due to the limitations of the mindspore.dataset API.
  4. +
+
+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

BatchDataset, dataset batched.

+
+
+ +
+ Source code in mindcv/data/loader.py +
 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
def create_loader(
+    dataset,
+    batch_size,
+    drop_remainder=False,
+    is_training=False,
+    mixup=0.0,
+    cutmix=0.0,
+    cutmix_prob=0.0,
+    num_classes=1000,
+    transform=None,
+    target_transform=None,
+    num_parallel_workers=None,
+    python_multiprocessing=False,
+    separate=False,
+):
+    r"""Creates dataloader.
+
+    Applies operations such as transform and batch to the `ms.dataset.Dataset` object
+    created by the `create_dataset` function to get the dataloader.
+
+    Args:
+        dataset (ms.dataset.Dataset): dataset object created by `create_dataset`.
+        batch_size (int or function): The number of rows each batch is created with. An
+            int or callable object which takes exactly 1 parameter, BatchInfo.
+        drop_remainder (bool, optional): Determines whether to drop the last block
+            whose data row number is less than batch size (default=False). If True, and if there are less
+            than batch_size rows available to make the last batch, then those rows will
+            be dropped and not propagated to the child node.
+        is_training (bool): whether it is in train mode. Default: False.
+        mixup (float): mixup alpha, mixup will be enabled if > 0. (default=0.0).
+        cutmix (float): cutmix alpha, cutmix will be enabled if > 0. (default=0.0). This operation is experimental.
+        cutmix_prob (float): prob of doing cutmix for an image (default=0.0)
+        num_classes (int): the number of classes. Default: 1000.
+        transform (list or None): the list of transformations that wil be applied on the image,
+            which is obtained by `create_transform`. If None, the default imagenet transformation
+            for evaluation will be applied. Default: None.
+        target_transform (list or None): the list of transformations that will be applied on the label.
+            If None, the label will be converted to the type of ms.int32. Default: None.
+        num_parallel_workers (int, optional): Number of workers(threads) to process the dataset in parallel
+            (default=None).
+        python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker processes. This
+            option could be beneficial if the Python operation is computational heavy (default=False).
+        separate(bool, optional): separate the image origin and the image been transformed
+
+    Note:
+        1. cutmix is now experimental (which means performance gain is not guarantee)
+            and can not be used together with mixup due to the label int type conflict.
+        2. `is_training`, `mixup`, `num_classes` is used for MixUp, which is a kind of transform operation.
+          However, we are not able to merge it into `transform`, due to the limitations of the `mindspore.dataset` API.
+
+
+    Returns:
+        BatchDataset, dataset batched.
+    """
+
+    if target_transform is None:
+        target_transform = transforms.TypeCast(ms.int32)
+    target_input_columns = "label" if "label" in dataset.get_col_names() else "fine_label"
+    dataset = dataset.map(
+        operations=target_transform,
+        input_columns=target_input_columns,
+        num_parallel_workers=num_parallel_workers,
+        python_multiprocessing=python_multiprocessing,
+    )
+
+    if transform is None:
+        warnings.warn(
+            "Using None as the default value of transform will set it back to "
+            "traditional image transform, which is not recommended. "
+            "You should explicitly call `create_transforms` and pass it to `create_loader`."
+        )
+        transform = create_transforms("imagenet", is_training=False)
+
+    # only apply augment splits to train dataset
+    if separate and is_training:
+        assert isinstance(transform, tuple) and len(transform) == 3
+
+        # Note: mindspore-2.0 delete the parameter column_order
+        sig = inspect.signature(dataset.map)
+        pass_column_order = False if "kwargs" in sig.parameters else True
+
+        # map all the transform
+        dataset = map_transform_splits(
+            dataset, transform, num_parallel_workers, python_multiprocessing, pass_column_order
+        )
+        # after batch, datasets has 4 columns
+        dataset = dataset.batch(batch_size=batch_size, drop_remainder=drop_remainder)
+        # concat the 3 columns of image
+        dataset = dataset.map(
+            operations=concat_per_batch_map,
+            input_columns=["image_clean", "image_aug1", "image_aug2", "label"],
+            output_columns=["image", "label"],
+            column_order=["image", "label"] if pass_column_order else None,
+            num_parallel_workers=num_parallel_workers,
+            python_multiprocessing=python_multiprocessing,
+        )
+
+    else:
+        dataset = dataset.map(
+            operations=transform,
+            input_columns="image",
+            num_parallel_workers=num_parallel_workers,
+            python_multiprocessing=python_multiprocessing,
+        )
+
+        dataset = dataset.batch(batch_size=batch_size, drop_remainder=drop_remainder)
+
+    if is_training:
+        if (mixup + cutmix > 0.0) and batch_size > 1:
+            # TODO: use mindspore vision cutmix and mixup after the confliction fixed in later release
+            # set label_smoothing 0 here since label smoothing is computed in loss module
+            mixup_fn = Mixup(
+                mixup_alpha=mixup,
+                cutmix_alpha=cutmix,
+                cutmix_minmax=None,
+                prob=cutmix_prob,
+                switch_prob=0.5,
+                label_smoothing=0.0,
+                num_classes=num_classes,
+            )
+            # images in a batch are mixed. labels are converted soft onehot labels.
+            dataset = dataset.map(
+                operations=mixup_fn,
+                input_columns=["image", target_input_columns],
+                num_parallel_workers=num_parallel_workers,
+            )
+
+    return dataset
+
+
+
+ +

MixUp

+ + +
+ + + +

+ mindcv.data.mixup.Mixup + + +

+ + +
+ + +

Mixup/Cutmix that applies different params to each element or whole batch

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
mixup_alpha +
+

mixup alpha value, mixup is active if > 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
cutmix_alpha +
+

cutmix alpha value, cutmix is active if > 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
cutmix_minmax +
+

cutmix min/max image ratio, cutmix is active and uses this vs alpha if not None.

+
+

+ + TYPE: + List[float] + + + DEFAULT: + None + +

+
prob +
+

probability of applying mixup or cutmix per batch or element

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
switch_prob +
+

probability of switching to cutmix instead of mixup when both are active

+
+

+ + TYPE: + float + + + DEFAULT: + 0.5 + +

+
mode +
+

how to apply mixup/cutmix params (per 'batch', 'pair' (pair of elements), 'elem' (element)

+
+

+ + TYPE: + str + + + DEFAULT: + 'batch' + +

+
correct_lam +
+

apply lambda correction when cutmix bbox clipped by image borders

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
label_smoothing +
+

apply label smoothing to the mixed target tensor

+
+

+ + TYPE: + float + + + DEFAULT: + 0.1 + +

+
num_classes +
+

number of classes for target

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
+ +
+ Source code in mindcv/data/mixup.py +
 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
class Mixup:
+    """Mixup/Cutmix that applies different params to each element or whole batch
+
+    Args:
+        mixup_alpha (float): mixup alpha value, mixup is active if > 0.
+        cutmix_alpha (float): cutmix alpha value, cutmix is active if > 0.
+        cutmix_minmax (List[float]): cutmix min/max image ratio, cutmix is active and uses this vs alpha if not None.
+        prob (float): probability of applying mixup or cutmix per batch or element
+        switch_prob (float): probability of switching to cutmix instead of mixup when both are active
+        mode (str): how to apply mixup/cutmix params (per 'batch', 'pair' (pair of elements), 'elem' (element)
+        correct_lam (bool): apply lambda correction when cutmix bbox clipped by image borders
+        label_smoothing (float): apply label smoothing to the mixed target tensor
+        num_classes (int): number of classes for target
+    """
+
+    def __init__(
+        self,
+        mixup_alpha=1.0,
+        cutmix_alpha=0.0,
+        cutmix_minmax=None,
+        prob=1.0,
+        switch_prob=0.5,
+        mode="batch",
+        correct_lam=True,
+        label_smoothing=0.1,
+        num_classes=1000,
+    ):
+        self.mixup_alpha = mixup_alpha
+        self.cutmix_alpha = cutmix_alpha
+        self.cutmix_minmax = cutmix_minmax
+        if self.cutmix_minmax is not None:
+            assert len(self.cutmix_minmax) == 2
+            # force cutmix alpha == 1.0 when minmax active to keep logic simple & safe
+            self.cutmix_alpha = 1.0
+        self.mix_prob = prob
+        self.switch_prob = switch_prob
+        self.label_smoothing = label_smoothing
+        self.num_classes = num_classes
+        self.mode = mode
+        self.correct_lam = correct_lam  # correct lambda based on clipped area for cutmix
+        self.mixup_enabled = True  # set false to disable mixing (intended tp be set by train loop)
+
+    def _params_per_elem(self, batch_size):
+        """_params_per_elem"""
+        lam = np.ones(batch_size, dtype=np.float32)
+        use_cutmix = np.zeros(batch_size, dtype=np.bool)
+        if self.mixup_enabled:
+            if self.mixup_alpha > 0.0 and self.cutmix_alpha > 0.0:
+                use_cutmix = np.random.rand(batch_size) < self.switch_prob
+                lam_mix = np.where(
+                    use_cutmix,
+                    np.random.beta(self.cutmix_alpha, self.cutmix_alpha, size=batch_size),
+                    np.random.beta(self.mixup_alpha, self.mixup_alpha, size=batch_size),
+                )
+            elif self.mixup_alpha > 0.0:
+                lam_mix = np.random.beta(self.mixup_alpha, self.mixup_alpha, size=batch_size)
+            elif self.cutmix_alpha > 0.0:
+                use_cutmix = np.ones(batch_size, dtype=np.bool)
+                lam_mix = np.random.beta(self.cutmix_alpha, self.cutmix_alpha, size=batch_size)
+            else:
+                assert False, "One of mixup_alpha > 0., cutmix_alpha > 0., cutmix_minmax not None should be true."
+            lam = np.where(np.random.rand(batch_size) < self.mix_prob, lam_mix.astype(np.float32), lam)
+        return lam, use_cutmix
+
+    def _params_per_batch(self):
+        """_params_per_batch"""
+        lam = 1.0
+        use_cutmix = False
+        if self.mixup_enabled and np.random.rand() < self.mix_prob:
+            if self.mixup_alpha > 0.0 and self.cutmix_alpha > 0.0:
+                use_cutmix = np.random.rand() < self.switch_prob
+                lam_mix = (
+                    np.random.beta(self.cutmix_alpha, self.cutmix_alpha)
+                    if use_cutmix
+                    else np.random.beta(self.mixup_alpha, self.mixup_alpha)
+                )
+            elif self.mixup_alpha > 0.0:
+                lam_mix = np.random.beta(self.mixup_alpha, self.mixup_alpha)
+            elif self.cutmix_alpha > 0.0:
+                use_cutmix = True
+                lam_mix = np.random.beta(self.cutmix_alpha, self.cutmix_alpha)
+            else:
+                assert False, "One of mixup_alpha > 0., cutmix_alpha > 0., cutmix_minmax not None should be true."
+            lam = float(lam_mix)
+        return lam, use_cutmix
+
+    def _mix_elem(self, x):
+        """_mix_elem"""
+        batch_size = len(x)
+        lam_batch, use_cutmix = self._params_per_elem(batch_size)
+        x_orig = x.clone()  # need to keep an unmodified original for mixing source
+        for i in range(batch_size):
+            j = batch_size - i - 1
+            lam = lam_batch[i]
+            if lam != 1.0:
+                if use_cutmix[i]:
+                    (yl, yh, xl, xh), lam = cutmix_bbox_and_lam(
+                        x[i].shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam
+                    )
+                    x[i][:, yl:yh, xl:xh] = x_orig[j][:, yl:yh, xl:xh]
+                    lam_batch[i] = lam
+                else:
+                    x[i] = x[i] * lam + x_orig[j] * (1 - lam)
+        return P.ExpandDims()(Tensor(lam_batch, dtype=mstype.float32), 1)
+
+    def _mix_pair(self, x):
+        """_mix_pair"""
+        batch_size = len(x)
+        lam_batch, use_cutmix = self._params_per_elem(batch_size // 2)
+        x_orig = x.clone()  # need to keep an unmodified original for mixing source
+        for i in range(batch_size // 2):
+            j = batch_size - i - 1
+            lam = lam_batch[i]
+            if lam != 1.0:
+                if use_cutmix[i]:
+                    (yl, yh, xl, xh), lam = cutmix_bbox_and_lam(
+                        x[i].shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam
+                    )
+                    x[i][:, yl:yh, xl:xh] = x_orig[j][:, yl:yh, xl:xh]
+                    x[j][:, yl:yh, xl:xh] = x_orig[i][:, yl:yh, xl:xh]
+                    lam_batch[i] = lam
+                else:
+                    x[i] = x[i] * lam + x_orig[j] * (1 - lam)
+                    x[j] = x[j] * lam + x_orig[i] * (1 - lam)
+        lam_batch = np.concatenate((lam_batch, lam_batch[::-1]))
+        return P.ExpandDims()(Tensor(lam_batch, dtype=mstype.float32), 1)
+
+    def _mix_batch(self, x):
+        """_mix_batch"""
+        lam, use_cutmix = self._params_per_batch()
+        if lam == 1.0:
+            return 1.0
+        if use_cutmix:
+            (yl, yh, xl, xh), lam = cutmix_bbox_and_lam(
+                x.shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam
+            )
+            x[:, :, yl:yh, xl:xh] = np.flip(x, axis=0)[:, :, yl:yh, xl:xh]
+        else:
+            x_flipped = np.flip(x, axis=0) * (1.0 - lam)
+            x *= lam
+            x += x_flipped
+        return lam
+
+    def __call__(self, x, target):
+        """Mixup apply"""
+        # the same to image, label
+        assert len(x) % 2 == 0, "Batch size should be even when using this"
+        if self.mode == "elem":
+            lam = self._mix_elem(x)
+        elif self.mode == "pair":
+            lam = self._mix_pair(x)
+        else:
+            lam = self._mix_batch(x)
+        target = mixup_target(target, self.num_classes, lam, self.label_smoothing)
+        return x.astype(np.float32), target.astype(np.float32)
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Transform Factory

+ + + +
+ + + +

+mindcv.data.transforms_factory.create_transforms(dataset_name='', image_resize=224, is_training=False, auto_augment=None, separate=False, **kwargs) + +

+ + +
+ +

Creates a list of transform operation on image data.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
dataset_name +
+

if '', customized dataset. Currently, apply the same transform pipeline as ImageNet. +if standard dataset name is given including imagenet, cifar10, mnist, preset transforms will be returned. +Default: ''.

+
+

+ + TYPE: + str + + + DEFAULT: + '' + +

+
image_resize +
+

the image size after resize for adapting to network. Default: 224.

+
+

+ + TYPE: + int + + + DEFAULT: + 224 + +

+
is_training +
+

if True, augmentation will be applied if support. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
separate +
+

separate the image origin and the image been transformed.

+
+

+ + DEFAULT: + False + +

+
**kwargs +
+

additional args parsed to transforms_imagenet_train and transforms_imagenet_eval

+
+

+ + DEFAULT: + {} + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

A list of transformation operations

+
+
+ +
+ Source code in mindcv/data/transforms_factory.py +
182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
def create_transforms(
+    dataset_name="",
+    image_resize=224,
+    is_training=False,
+    auto_augment=None,
+    separate=False,
+    **kwargs,
+):
+    r"""Creates a list of transform operation on image data.
+
+    Args:
+        dataset_name (str): if '', customized dataset. Currently, apply the same transform pipeline as ImageNet.
+            if standard dataset name is given including imagenet, cifar10, mnist, preset transforms will be returned.
+            Default: ''.
+        image_resize (int): the image size after resize for adapting to network. Default: 224.
+        is_training (bool): if True, augmentation will be applied if support. Default: False.
+        auto_augment(str):augmentation strategies, such as "augmix", "autoaug" etc.
+        separate: separate the image origin and the image been transformed.
+        **kwargs: additional args parsed to `transforms_imagenet_train` and `transforms_imagenet_eval`
+
+    Returns:
+        A list of transformation operations
+    """
+
+    dataset_name = dataset_name.lower()
+
+    if dataset_name in ("imagenet", ""):
+        trans_args = dict(image_resize=image_resize, **kwargs)
+        if is_training:
+            return transforms_imagenet_train(auto_augment=auto_augment, separate=separate, **trans_args)
+
+        return transforms_imagenet_eval(**trans_args)
+    elif dataset_name in ("cifar10", "cifar100"):
+        trans_list = transforms_cifar(resize=image_resize, is_training=is_training)
+        return trans_list
+    elif dataset_name == "mnist":
+        trans_list = transforms_mnist(resize=image_resize)
+        return trans_list
+    else:
+        raise NotImplementedError(
+            f"Only supports creating transforms for ['imagenet'] datasets, but got {dataset_name}."
+        )
+
+
+
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/reference/loss/index.html b/en/reference/loss/index.html new file mode 100644 index 000000000..0de0f5e22 --- /dev/null +++ b/en/reference/loss/index.html @@ -0,0 +1,1994 @@ + + + + + + + + + + + + + + + + + + + + + + + + loss - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Loss

+

Loss Factory

+ + + +
+ + + +

+mindcv.loss.loss_factory.create_loss(name='CE', weight=None, reduction='mean', label_smoothing=0.0, aux_factor=0.0) + +

+ + +
+ +

Creates loss function

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
name +
+

loss name : 'CE' for cross_entropy. 'BCE': binary cross entropy. Default: 'CE'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'CE' + +

+
weight +
+

Class weight. A rescaling weight given to the loss of each batch element. +If given, has to be a Tensor of size 'nbatch'. Data type must be float16 or float32.

+
+

+ + TYPE: + Tensor + + + DEFAULT: + None + +

+
reduction +
+

Apply specific reduction method to the output: 'mean' or 'sum'. +By default, the sum of the output will be divided by the number of elements in the output. +'sum': the output will be summed. Default:'mean'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'mean' + +

+
label_smoothing +
+

Label smoothing factor, a regularization tool used to prevent the model +from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
aux_factor +
+

Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs +(i.e., deep supervision), like inception_v3. Default: 0.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
+ +
+ Inputs +
    +
  • logits (Tensor or Tuple of Tensor): Input logits. Shape [N, C], where N means the number of samples, + C means number of classes. Tuple of two input logits are supported in order (main_logits, aux_logits) + for auxiliary loss used in networks like inception_v3. Data type must be float16 or float32.
  • +
  • labels (Tensor): Ground truth labels. Shape: [N] or [N, C]. + (1) If in shape [N], sparse labels representing the class indices. Must be int type. + (2) shape [N, C], dense labels representing the ground truth class probability values, + or the one-hot labels. Must be float type. If the loss type is BCE, the shape of labels must be [N, C].
  • +
+
+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

Loss function to compute the loss between the input logits and labels.

+
+
+ +
+ Source code in mindcv/loss/loss_factory.py +
14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
def create_loss(
+    name: str = "CE",
+    weight: Optional[Tensor] = None,
+    reduction: str = "mean",
+    label_smoothing: float = 0.0,
+    aux_factor: float = 0.0,
+):
+    r"""Creates loss function
+
+    Args:
+        name (str):  loss name : 'CE' for cross_entropy. 'BCE': binary cross entropy. Default: 'CE'.
+        weight (Tensor): Class weight. A rescaling weight given to the loss of each batch element.
+            If given, has to be a Tensor of size 'nbatch'. Data type must be float16 or float32.
+        reduction: Apply specific reduction method to the output: 'mean' or 'sum'.
+            By default, the sum of the output will be divided by the number of elements in the output.
+            'sum': the output will be summed. Default:'mean'.
+        label_smoothing: Label smoothing factor, a regularization tool used to prevent the model
+            from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.
+        aux_factor (float): Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs
+            (i.e., deep supervision), like inception_v3. Default: 0.0.
+
+    Inputs:
+        - logits (Tensor or Tuple of Tensor): Input logits. Shape [N, C], where N means the number of samples,
+            C means number of classes. Tuple of two input logits are supported in order (main_logits, aux_logits)
+            for auxiliary loss used in networks like inception_v3. Data type must be float16 or float32.
+        - labels (Tensor): Ground truth labels. Shape: [N] or [N, C].
+            (1) If in shape [N], sparse labels representing the class indices. Must be int type.
+            (2) shape [N, C], dense labels representing the ground truth class probability values,
+            or the one-hot labels. Must be float type. If the loss type is BCE, the shape of labels must be [N, C].
+
+    Returns:
+       Loss function to compute the loss between the input logits and labels.
+    """
+    name = name.lower()
+
+    if name == "ce":
+        loss = CrossEntropySmooth(smoothing=label_smoothing, aux_factor=aux_factor, reduction=reduction, weight=weight)
+    elif name == "bce":
+        loss = BinaryCrossEntropySmooth(
+            smoothing=label_smoothing, aux_factor=aux_factor, reduction=reduction, weight=weight, pos_weight=None
+        )
+    elif name == "asl_single_label":
+        loss = AsymmetricLossSingleLabel(smoothing=label_smoothing)
+    elif name == "asl_multi_label":
+        loss = AsymmetricLossMultilabel()
+    elif name == "jsd":
+        loss = JSDCrossEntropy(smoothing=label_smoothing, aux_factor=aux_factor, reduction=reduction, weight=weight)
+    else:
+        raise NotImplementedError
+
+    return loss
+
+
+
+ +

Cross Entropy

+ + +
+ + + +

+ mindcv.loss.cross_entropy_smooth.CrossEntropySmooth + + +

+ + +
+

+ Bases: nn.LossBase

+ + +

Cross entropy loss with label smoothing. +Apply softmax activation function to input logits, and uses the given logits to compute cross entropy +between the logits and the label.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
smoothing +
+

Label smoothing factor, a regularization tool used to prevent the model +from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.

+
+

+ + DEFAULT: + 0.0 + +

+
aux_factor +
+

Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs +(i.e., deep supervision), like inception_v3. Default: 0.0.

+
+

+ + DEFAULT: + 0.0 + +

+
reduction +
+

Apply specific reduction method to the output: 'mean' or 'sum'. Default: 'mean'.

+
+

+ + DEFAULT: + 'mean' + +

+
weight +
+

Class weight. Shape [C]. A rescaling weight applied to the loss of each batch element. +Data type must be float16 or float32.

+
+

+ + TYPE: + Tensor + + + DEFAULT: + None + +

+
+ +
+ Inputs +

logits (Tensor or Tuple of Tensor): Input logits. Shape [N, C], where N is # samples, C is # classes. + Tuple composed of multiple logits are supported in order (main_logits, aux_logits) + for auxiliary loss used in networks like inception_v3. +labels (Tensor): Ground truth label. Shape: [N] or [N, C]. + (1) Shape (N), sparse labels representing the class indices. Must be int type. + (2) Shape [N, C], dense labels representing the ground truth class probability values, + or the one-hot labels. Must be float type.

+
+
+ Source code in mindcv/loss/cross_entropy_smooth.py +
 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
class CrossEntropySmooth(nn.LossBase):
+    """
+    Cross entropy loss with label smoothing.
+    Apply softmax activation function to input `logits`, and uses the given logits to compute cross entropy
+    between the logits and the label.
+
+    Args:
+        smoothing: Label smoothing factor, a regularization tool used to prevent the model
+            from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.
+        aux_factor: Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs
+            (i.e., deep supervision), like inception_v3.  Default: 0.0.
+        reduction: Apply specific reduction method to the output: 'mean' or 'sum'. Default: 'mean'.
+        weight (Tensor): Class weight. Shape [C]. A rescaling weight applied to the loss of each batch element.
+            Data type must be float16 or float32.
+
+    Inputs:
+        logits (Tensor or Tuple of Tensor): Input logits. Shape [N, C], where N is # samples, C is # classes.
+            Tuple composed of multiple logits are supported in order (main_logits, aux_logits)
+            for auxiliary loss used in networks like inception_v3.
+        labels (Tensor): Ground truth label. Shape: [N] or [N, C].
+            (1) Shape (N), sparse labels representing the class indices. Must be int type.
+            (2) Shape [N, C], dense labels representing the ground truth class probability values,
+            or the one-hot labels. Must be float type.
+    """
+
+    def __init__(self, smoothing=0.0, aux_factor=0.0, reduction="mean", weight=None):
+        super().__init__()
+        self.smoothing = smoothing
+        self.aux_factor = aux_factor
+        self.reduction = reduction
+        self.weight = weight
+
+    def construct(self, logits, labels):
+        loss_aux = 0
+
+        if isinstance(logits, tuple):
+            main_logits = logits[0]
+            for aux in logits[1:]:
+                if self.aux_factor > 0:
+                    loss_aux += F.cross_entropy(
+                        aux, labels, weight=self.weight, reduction=self.reduction, label_smoothing=self.smoothing
+                    )
+        else:
+            main_logits = logits
+
+        loss_logits = F.cross_entropy(
+            main_logits, labels, weight=self.weight, reduction=self.reduction, label_smoothing=self.smoothing
+        )
+        loss = loss_logits + self.aux_factor * loss_aux
+        return loss
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Binary Cross Entropy

+ + +
+ + + +

+ mindcv.loss.binary_cross_entropy_smooth.BinaryCrossEntropySmooth + + +

+ + +
+

+ Bases: nn.LossBase

+ + +

Binary cross entropy loss with label smoothing. +Apply sigmoid activation function to input logits, and uses the given logits to compute binary cross entropy +between the logits and the label.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
smoothing +
+

Label smoothing factor, a regularization tool used to prevent the model +from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.

+
+

+ + DEFAULT: + 0.0 + +

+
aux_factor +
+

Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs +(i.e., deep supervision), like inception_v3. Default: 0.0.

+
+

+ + DEFAULT: + 0.0 + +

+
reduction +
+

Apply specific reduction method to the output: 'mean' or 'sum'. Default: 'mean'.

+
+

+ + DEFAULT: + 'mean' + +

+
weight +
+

Class weight. A rescaling weight applied to the loss of each batch element. Shape [C]. +It can be broadcast to a tensor with shape of logits. Data type must be float16 or float32.

+
+

+ + TYPE: + Tensor + + + DEFAULT: + None + +

+
pos_weight +
+

Positive weight for each class. A weight of positive examples. Shape [C]. +Must be a vector with length equal to the number of classes. +It can be broadcast to a tensor with shape of logits. Data type must be float16 or float32.

+
+

+ + TYPE: + Tensor + + + DEFAULT: + None + +

+
+ +
+ Inputs +

logits (Tensor or Tuple of Tensor): (1) Input logits. Shape [N, C], where N is # samples, C is # classes. + Or (2) Tuple of two input logits (main_logits and aux_logits) for auxiliary loss. +labels (Tensor): Ground truth label, (1) shape [N, C], has the same shape as logits or (2) shape [N]. + can be a class probability matrix or one-hot labels. Data type must be float16 or float32.

+
+
+ Source code in mindcv/loss/binary_cross_entropy_smooth.py +
 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
+87
class BinaryCrossEntropySmooth(nn.LossBase):
+    """
+    Binary cross entropy loss with label smoothing.
+    Apply sigmoid activation function to input `logits`, and uses the given logits to compute binary cross entropy
+    between the logits and the label.
+
+    Args:
+        smoothing: Label smoothing factor, a regularization tool used to prevent the model
+            from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.
+        aux_factor: Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs
+            (i.e., deep supervision), like inception_v3.  Default: 0.0.
+        reduction: Apply specific reduction method to the output: 'mean' or 'sum'. Default: 'mean'.
+        weight (Tensor): Class weight. A rescaling weight applied to the loss of each batch element. Shape [C].
+            It can be broadcast to a tensor with shape of `logits`. Data type must be float16 or float32.
+        pos_weight (Tensor): Positive weight for each class. A weight of positive examples. Shape [C].
+            Must be a vector with length equal to the number of classes.
+            It can be broadcast to a tensor with shape of `logits`. Data type must be float16 or float32.
+
+    Inputs:
+        logits (Tensor or Tuple of Tensor): (1) Input logits. Shape [N, C], where N is # samples, C is # classes.
+            Or (2) Tuple of two input logits (main_logits and aux_logits) for auxiliary loss.
+        labels (Tensor): Ground truth label, (1) shape [N, C], has the same shape as `logits` or (2) shape [N].
+            can be a class probability matrix or one-hot labels. Data type must be float16 or float32.
+    """
+
+    def __init__(self, smoothing=0.0, aux_factor=0.0, reduction="mean", weight=None, pos_weight=None):
+        super().__init__()
+        self.smoothing = smoothing
+        self.aux_factor = aux_factor
+        self.reduction = reduction
+        self.weight = weight
+        self.pos_weight = pos_weight
+        self.ones = P.OnesLike()
+        self.one_hot = P.OneHot()
+
+    def construct(self, logits, labels):
+        loss_aux = 0
+        aux_logits = None
+
+        if isinstance(logits, tuple):
+            main_logits = logits[0]
+        else:
+            main_logits = logits
+
+        if main_logits.size != labels.size:
+            # We must explicitly convert the label to one-hot,
+            # for binary_cross_entropy_with_logits restricting input and label have the same shape.
+            class_dim = 0 if main_logits.ndim == 1 else 1
+            n_classes = main_logits.shape[class_dim]
+            labels = self.one_hot(labels, n_classes, Tensor(1.0), Tensor(0.0))
+
+        ones_input = self.ones(main_logits)
+        if self.weight is not None:
+            weight = self.weight
+        else:
+            weight = ones_input
+        if self.pos_weight is not None:
+            pos_weight = self.pos_weight
+        else:
+            pos_weight = ones_input
+
+        if self.smoothing > 0.0:
+            class_dim = 0 if main_logits.ndim == 1 else -1
+            n_classes = main_logits.shape[class_dim]
+            labels = labels * (1 - self.smoothing) + self.smoothing / n_classes
+
+        if self.aux_factor > 0 and aux_logits is not None:
+            for aux_logits in logits[1:]:
+                loss_aux += F.binary_cross_entropy_with_logits(
+                    aux_logits, labels, weight=weight, pos_weight=pos_weight, reduction=self.reduction
+                )
+        # else:
+        #    warnings.warn("There are logit tuple input, but the auxiliary loss factor is 0.")
+
+        loss_logits = F.binary_cross_entropy_with_logits(
+            main_logits, labels, weight=weight, pos_weight=pos_weight, reduction=self.reduction
+        )
+
+        loss = loss_logits + self.aux_factor * loss_aux
+
+        return loss
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/reference/models.layers/index.html b/en/reference/models.layers/index.html new file mode 100644 index 000000000..88cfccff2 --- /dev/null +++ b/en/reference/models.layers/index.html @@ -0,0 +1,2976 @@ + + + + + + + + + + + + + + + + + + + + + + + + models.layers - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Common Layers in Model

+

Activation

+ + +
+ + + +

+ mindcv.models.layers.activation.Swish + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Swish activation function: x * sigmoid(x).

+ +
+ Return +

Tensor

+
+
+ Example +
+
+
+

x = Tensor(((20, 16), (50, 50)), mindspore.float32) +Swish()(x)

+
+
+
+
+
+ Source code in mindcv/models/layers/activation.py +
10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
class Swish(nn.Cell):
+    """
+    Swish activation function: x * sigmoid(x).
+
+    Args:
+        None
+
+    Return:
+        Tensor
+
+    Example:
+        >>> x = Tensor(((20, 16), (50, 50)), mindspore.float32)
+        >>> Swish()(x)
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.result = None
+        self.sigmoid = nn.Sigmoid()
+
+    def construct(self, x):
+        result = x * self.sigmoid(x)
+        return result
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

DropPath

+ + +
+ + + +

+ mindcv.models.layers.drop_path.DropPath + + +

+ + +
+

+ Bases: nn.Cell

+ + +

DropPath (Stochastic Depth) regularization layers

+ +
+ Source code in mindcv/models/layers/drop_path.py +
12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
class DropPath(nn.Cell):
+    """DropPath (Stochastic Depth) regularization layers"""
+
+    def __init__(
+        self,
+        drop_prob: float = 0.0,
+        scale_by_keep: bool = True,
+    ) -> None:
+        super().__init__()
+        self.keep_prob = 1.0 - drop_prob
+        self.scale_by_keep = scale_by_keep
+        self.dropout = Dropout(p=drop_prob)
+
+    def construct(self, x: Tensor) -> Tensor:
+        if self.keep_prob == 1.0 or not self.training:
+            return x
+        shape = (x.shape[0],) + (1,) * (x.ndim - 1)
+        random_tensor = self.dropout(ones(shape))
+        if not self.scale_by_keep:
+            random_tensor = ops.mul(random_tensor, self.keep_prob)
+        return x * random_tensor
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Identity

+ + +
+ + + +

+ mindcv.models.layers.identity.Identity + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Identity

+ +
+ Source code in mindcv/models/layers/identity.py +
5
+6
+7
+8
+9
class Identity(nn.Cell):
+    """Identity"""
+
+    def construct(self, x):
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

MLP

+ + +
+ + + +

+ mindcv.models.layers.mlp.Mlp + + +

+ + +
+

+ Bases: nn.Cell

+ + +
+ Source code in mindcv/models/layers/mlp.py +
10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
class Mlp(nn.Cell):
+    def __init__(
+        self,
+        in_features: int,
+        hidden_features: Optional[int] = None,
+        out_features: Optional[int] = None,
+        act_layer: Optional[nn.Cell] = nn.GELU,
+        drop: float = 0.0,
+    ) -> None:
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Dense(in_channels=in_features, out_channels=hidden_features, has_bias=True)
+        self.act = act_layer()
+        self.fc2 = nn.Dense(in_channels=hidden_features, out_channels=out_features, has_bias=True)
+        self.drop = Dropout(p=drop)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Patch Embedding

+ + +
+ + + +

+ mindcv.models.layers.patch_embed.PatchEmbed + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Image to Patch Embedding

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
image_size +
+

Image size. Default: 224.

+
+

+ + TYPE: + int + + + DEFAULT: + 224 + +

+
patch_size +
+

Patch token size. Default: 4.

+
+

+ + TYPE: + int + + + DEFAULT: + 4 + +

+
in_chans +
+

Number of input image channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
embed_dim +
+

Number of linear projection output channels. Default: 96.

+
+

+ + TYPE: + int + + + DEFAULT: + 96 + +

+
norm_layer +
+

Normalization layer. Default: None

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + None + +

+
+ +
+ Source code in mindcv/models/layers/patch_embed.py +
10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
class PatchEmbed(nn.Cell):
+    """Image to Patch Embedding
+
+    Args:
+        image_size (int): Image size.  Default: 224.
+        patch_size (int): Patch token size. Default: 4.
+        in_chans (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        norm_layer (nn.Cell, optional): Normalization layer. Default: None
+    """
+
+    def __init__(
+        self,
+        image_size: int = 224,
+        patch_size: int = 4,
+        in_chans: int = 3,
+        embed_dim: int = 96,
+        norm_layer: Optional[nn.Cell] = None,
+    ) -> None:
+        super().__init__()
+        image_size = to_2tuple(image_size)
+        patch_size = to_2tuple(patch_size)
+        patches_resolution = [image_size[0] // patch_size[0], image_size[1] // patch_size[1]]
+        self.image_size = image_size
+        self.patch_size = patch_size
+        self.patches_resolution = patches_resolution
+        self.num_patches = patches_resolution[0] * patches_resolution[1]
+
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+
+        self.proj = nn.Conv2d(in_channels=in_chans, out_channels=embed_dim, kernel_size=patch_size, stride=patch_size,
+                              pad_mode='pad', has_bias=True, weight_init="TruncatedNormal")
+
+        if norm_layer is not None:
+            if isinstance(embed_dim, int):
+                embed_dim = (embed_dim,)
+            self.norm = norm_layer(embed_dim, epsilon=1e-5)
+        else:
+            self.norm = None
+
+    def construct(self, x: Tensor) -> Tensor:
+        """docstring"""
+        B = x.shape[0]
+        # FIXME look at relaxing size constraints
+        x = ops.Reshape()(self.proj(x), (B, self.embed_dim, -1))  # B Ph*Pw C
+        x = ops.Transpose()(x, (0, 2, 1))
+
+        if self.norm is not None:
+            x = self.norm(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.layers.patch_embed.PatchEmbed.construct(x) + +

+ + +
+ +

docstring

+ +
+ Source code in mindcv/models/layers/patch_embed.py +
51
+52
+53
+54
+55
+56
+57
+58
+59
+60
def construct(self, x: Tensor) -> Tensor:
+    """docstring"""
+    B = x.shape[0]
+    # FIXME look at relaxing size constraints
+    x = ops.Reshape()(self.proj(x), (B, self.embed_dim, -1))  # B Ph*Pw C
+    x = ops.Transpose()(x, (0, 2, 1))
+
+    if self.norm is not None:
+        x = self.norm(x)
+    return x
+
+
+
+ +
+ + + +
+ +
+ +

Pooling

+ + +
+ + + +

+ mindcv.models.layers.pooling.GlobalAvgPooling + + +

+ + +
+

+ Bases: nn.Cell

+ + +

GlobalAvgPooling, same as torch.nn.AdaptiveAvgPool2d when output shape is 1

+ +
+ Source code in mindcv/models/layers/pooling.py +
 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
class GlobalAvgPooling(nn.Cell):
+    """
+    GlobalAvgPooling, same as torch.nn.AdaptiveAvgPool2d when output shape is 1
+    """
+
+    def __init__(self, keep_dims: bool = False) -> None:
+        super().__init__()
+        self.keep_dims = keep_dims
+
+    def construct(self, x):
+        x = ops.mean(x, axis=(2, 3), keep_dims=self.keep_dims)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Selective Kernel

+ + +
+ + + +

+ mindcv.models.layers.selective_kernel.SelectiveKernelAttn + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Selective Kernel Attention Module +Selective Kernel attention mechanism factored out into its own module.

+ +
+ Source code in mindcv/models/layers/selective_kernel.py +
24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
class SelectiveKernelAttn(nn.Cell):
+    """Selective Kernel Attention Module
+    Selective Kernel attention mechanism factored out into its own module.
+    """
+
+    def __init__(
+        self,
+        channels: int,
+        num_paths: int = 2,
+        attn_channels: int = 32,
+        activation: Optional[nn.Cell] = nn.ReLU,
+        norm: Optional[nn.Cell] = nn.BatchNorm2d,
+    ):
+        super().__init__()
+        self.num_paths = num_paths
+        self.mean = GlobalAvgPooling(keep_dims=True)
+        self.fc_reduce = nn.Conv2d(channels, attn_channels, kernel_size=1, has_bias=False)
+        self.bn = norm(attn_channels)
+        self.act = activation()
+        self.fc_select = nn.Conv2d(attn_channels, channels * num_paths, kernel_size=1)
+        self.softmax = nn.Softmax(axis=1)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.mean((x.sum(1)))
+        x = self.fc_reduce(x)
+        x = self.bn(x)
+        x = self.act(x)
+        x = self.fc_select(x)
+        b, c, h, w = x.shape
+        x = x.reshape((b, self.num_paths, c // self.num_paths, h, w))
+        x = self.softmax(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ +
+ + + +

+ mindcv.models.layers.selective_kernel.SelectiveKernel + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Selective Kernel Convolution Module +As described in Selective Kernel Networks (https://arxiv.org/abs/1903.06586) with some modifications. +Largest change is the input split, which divides the input channels across each convolution path, this can +be viewed as a grouping of sorts, but the output channel counts expand to the module level value. This keeps +the parameter count from ballooning when the convolutions themselves don't have groups, but still provides +a noteworthy increase in performance over similar param count models without this attention layer. -Ross W

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
in_channels +
+

module input (feature) channel count

+
+

+ + TYPE: + int + +

+
out_channels +
+

module output (feature) channel count

+
+

+ + TYPE: + int + + + DEFAULT: + None + +

+
kernel_size +
+

kernel size for each convolution branch

+
+

+ + TYPE: + (int, list) + + + DEFAULT: + None + +

+
stride +
+

stride for convolutions

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
dilation +
+

dilation for module as a whole, impacts dilation of each branch

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
groups +
+

number of groups for each branch

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
rd_ratio +
+

reduction factor for attention features

+
+

+ + TYPE: + (int, float) + + + DEFAULT: + 1.0 / 16 + +

+
rd_channels(int) +
+

reduction channels can be specified directly by arg (if rd_channels is set)

+
+

+

+
rd_divisor(int) +
+

divisor can be specified to keep channels

+
+

+

+
keep_3x3 +
+

keep all branch convolution kernels as 3x3, changing larger kernels for dilations

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
split_input +
+

split input channels evenly across each convolution branch, keeps param count lower, +can be viewed as grouping by path, output expands to module out_channels count

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
activation +
+

activation layer to use

+
+

+ + TYPE: + nn.Module + + + DEFAULT: + nn.ReLU + +

+
norm +
+

batchnorm/norm layer to use

+
+

+ + TYPE: + nn.Module + + + DEFAULT: + nn.BatchNorm2d + +

+
+ +
+ Source code in mindcv/models/layers/selective_kernel.py +
 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
class SelectiveKernel(nn.Cell):
+    """Selective Kernel Convolution Module
+    As described in Selective Kernel Networks (https://arxiv.org/abs/1903.06586) with some modifications.
+    Largest change is the input split, which divides the input channels across each convolution path, this can
+    be viewed as a grouping of sorts, but the output channel counts expand to the module level value. This keeps
+    the parameter count from ballooning when the convolutions themselves don't have groups, but still provides
+    a noteworthy increase in performance over similar param count models without this attention layer. -Ross W
+    Args:
+        in_channels (int):  module input (feature) channel count
+        out_channels (int):  module output (feature) channel count
+        kernel_size (int, list): kernel size for each convolution branch
+        stride (int): stride for convolutions
+        dilation (int): dilation for module as a whole, impacts dilation of each branch
+        groups (int): number of groups for each branch
+        rd_ratio (int, float): reduction factor for attention features
+        rd_channels(int): reduction channels can be specified directly by arg (if rd_channels is set)
+        rd_divisor(int): divisor can be specified to keep channels
+        keep_3x3 (bool): keep all branch convolution kernels as 3x3, changing larger kernels for dilations
+        split_input (bool): split input channels evenly across each convolution branch, keeps param count lower,
+            can be viewed as grouping by path, output expands to module out_channels count
+        activation (nn.Module): activation layer to use
+        norm (nn.Module): batchnorm/norm layer to use
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: Optional[int] = None,
+        kernel_size: Optional[Union[int, List]] = None,
+        stride: int = 1,
+        dilation: int = 1,
+        groups: int = 1,
+        rd_ratio: float = 1.0 / 16,
+        rd_channels: Optional[int] = None,
+        rd_divisor: int = 8,
+        keep_3x3: bool = True,
+        split_input: bool = True,
+        activation: Optional[nn.Cell] = nn.ReLU,
+        norm: Optional[nn.Cell] = nn.BatchNorm2d,
+    ):
+        super().__init__()
+        out_channels = out_channels or in_channels
+        kernel_size = kernel_size or [3, 5]  # default to one 3x3 and one 5x5 branch. 5x5 -> 3x3 + dilation
+        _kernel_valid(kernel_size)
+        if not isinstance(kernel_size, list):
+            kernel_size = [kernel_size] * 2
+        if keep_3x3:
+            dilation = [dilation * (k - 1) // 2 for k in kernel_size]
+            kernel_size = [3] * len(kernel_size)
+        else:
+            dilation = [dilation] * len(kernel_size)
+        self.num_paths = len(kernel_size)
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.split_input = split_input
+        if self.split_input:
+            assert in_channels % self.num_paths == 0
+            in_channels = in_channels // self.num_paths
+        groups = min(out_channels, groups)
+        self.split = Split(split_size_or_sections=self.in_channels // self.num_paths, output_num=self.num_paths, axis=1)
+
+        self.paths = nn.CellList([
+            Conv2dNormActivation(in_channels, out_channels, kernel_size=k, stride=stride, groups=groups,
+                                 dilation=d, activation=activation, norm=norm)
+            for k, d in zip(kernel_size, dilation)
+        ])
+
+        attn_channels = rd_channels or make_divisible(out_channels * rd_ratio, divisor=rd_divisor)
+        self.attn = SelectiveKernelAttn(out_channels, self.num_paths, attn_channels)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x_paths = []
+        if self.split_input:
+            x_split = self.split(x)
+            for i, op in enumerate(self.paths):
+                x_paths.append(op(x_split[i]))
+        else:
+            for op in self.paths:
+                x_paths.append(op(x))
+
+        x = ops.stack(x_paths, axis=1)
+        x_attn = self.attn(x)
+        x = x * x_attn
+        x = x.sum(1)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Squeeze and Excite

+ + +
+ + + +

+ mindcv.models.layers.squeeze_excite.SqueezeExcite + + +

+ + +
+

+ Bases: nn.Cell

+ + +

SqueezeExcite Module as defined in original SE-Nets with a few additions.

+ +
+ Additions include +
    +
  • divisor can be specified to keep channels % div == 0 (default: 8)
  • +
  • reduction channels can be specified directly by arg (if rd_channels is set)
  • +
  • reduction channels can be specified by float rd_ratio (default: 1/16)
  • +
  • customizable activation, normalization, and gate layer
  • +
+
+
+ Source code in mindcv/models/layers/squeeze_excite.py +
14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
class SqueezeExcite(nn.Cell):
+    """SqueezeExcite Module as defined in original SE-Nets with a few additions.
+    Additions include:
+        * divisor can be specified to keep channels % div == 0 (default: 8)
+        * reduction channels can be specified directly by arg (if rd_channels is set)
+        * reduction channels can be specified by float rd_ratio (default: 1/16)
+        * customizable activation, normalization, and gate layer
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        rd_ratio: float = 1.0 / 16,
+        rd_channels: Optional[int] = None,
+        rd_divisor: int = 8,
+        norm: Optional[nn.Cell] = None,
+        act_layer: nn.Cell = nn.ReLU,
+        gate_layer: nn.Cell = nn.Sigmoid,
+    ) -> None:
+        super().__init__()
+        self.norm = norm
+        self.act = act_layer()
+        self.gate = gate_layer()
+        if not rd_channels:
+            rd_channels = make_divisible(in_channels * rd_ratio, rd_divisor)
+
+        self.conv_reduce = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=rd_channels,
+            kernel_size=1,
+            has_bias=True,
+        )
+        if self.norm:
+            self.bn = nn.BatchNorm2d(rd_channels)
+        self.conv_expand = nn.Conv2d(
+            in_channels=rd_channels,
+            out_channels=in_channels,
+            kernel_size=1,
+            has_bias=True,
+        )
+        self.pool = GlobalAvgPooling(keep_dims=True)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x_se = self.pool(x)
+        x_se = self.conv_reduce(x_se)
+        if self.norm:
+            x_se = self.bn(x_se)
+        x_se = self.act(x_se)
+        x_se = self.conv_expand(x_se)
+        x_se = self.gate(x_se)
+        x = x * x_se
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ +
+ + + +

+ mindcv.models.layers.squeeze_excite.SqueezeExciteV2 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

SqueezeExcite Module as defined in original SE-Nets with a few additions. +V1 uses 1x1conv to replace fc layers, and V2 uses nn.Dense to implement directly.

+ +
+ Source code in mindcv/models/layers/squeeze_excite.py +
 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
class SqueezeExciteV2(nn.Cell):
+    """SqueezeExcite Module as defined in original SE-Nets with a few additions.
+    V1 uses 1x1conv to replace fc layers, and V2 uses nn.Dense to implement directly.
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        rd_ratio: float = 1.0 / 16,
+        rd_channels: Optional[int] = None,
+        rd_divisor: int = 8,
+        norm: Optional[nn.Cell] = None,
+        act_layer: nn.Cell = nn.ReLU,
+        gate_layer: nn.Cell = nn.Sigmoid,
+    ) -> None:
+        super().__init__()
+        self.norm = norm
+        self.act = act_layer()
+        self.gate = gate_layer()
+        if not rd_channels:
+            rd_channels = make_divisible(in_channels * rd_ratio, rd_divisor)
+
+        self.conv_reduce = nn.Dense(
+            in_channels=in_channels,
+            out_channels=rd_channels,
+            has_bias=True,
+        )
+        if self.norm:
+            self.bn = nn.BatchNorm2d(rd_channels)
+        self.conv_expand = nn.Dense(
+            in_channels=rd_channels,
+            out_channels=in_channels,
+            has_bias=True,
+        )
+        self.pool = GlobalAvgPooling(keep_dims=False)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x_se = self.pool(x)
+        x_se = self.conv_reduce(x_se)
+        if self.norm:
+            x_se = self.bn(x_se)
+        x_se = self.act(x_se)
+        x_se = self.conv_expand(x_se)
+        x_se = self.gate(x_se)
+        x_se = ops.expand_dims(x_se, -1)
+        x_se = ops.expand_dims(x_se, -1)
+        x = x * x_se
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/reference/models/index.html b/en/reference/models/index.html new file mode 100644 index 000000000..77d217276 --- /dev/null +++ b/en/reference/models/index.html @@ -0,0 +1,40359 @@ + + + + + + + + + + + + + + + + + + + + + + + + models - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Models

+

Create Model

+ + + +
+ + + +

+mindcv.models.model_factory.create_model(model_name, num_classes=1000, pretrained=False, in_channels=3, checkpoint_path='', ema=False, auto_mapping=False, **kwargs) + +

+ + +
+ +

Creates model by name.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
model_name +
+

The name of model.

+
+

+ + TYPE: + str + +

+
num_classes +
+

The number of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
pretrained +
+

Whether to load the pretrained model. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
in_channels +
+

The input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
checkpoint_path +
+

The path of checkpoint files. Default: "".

+
+

+ + TYPE: + str + + + DEFAULT: + '' + +

+
ema +
+

Whether use ema method. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
auto_mapping +
+

Whether to automatically map the names of checkpoint weights +to the names of model weights when there are differences in names. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/model_factory.py +
 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
def create_model(
+    model_name: str,
+    num_classes: int = 1000,
+    pretrained: bool = False,
+    in_channels: int = 3,
+    checkpoint_path: str = "",
+    ema: bool = False,
+    auto_mapping: bool = False,
+    **kwargs,
+):
+    r"""Creates model by name.
+
+    Args:
+        model_name (str):  The name of model.
+        num_classes (int): The number of classes. Default: 1000.
+        pretrained (bool): Whether to load the pretrained model. Default: False.
+        in_channels (int): The input channels. Default: 3.
+        checkpoint_path (str): The path of checkpoint files. Default: "".
+        ema (bool): Whether use ema method. Default: False.
+        auto_mapping (bool): Whether to automatically map the names of checkpoint weights
+            to the names of model weights when there are differences in names. Default: False.
+    """
+
+    if checkpoint_path != "" and pretrained:
+        raise ValueError("checkpoint_path is mutually exclusive with pretrained")
+
+    model_args = dict(num_classes=num_classes, pretrained=pretrained, in_channels=in_channels)
+    kwargs = {k: v for k, v in kwargs.items() if v is not None}
+
+    if not is_model(model_name):
+        raise RuntimeError(f"Unknown model {model_name}")
+
+    create_fn = model_entrypoint(model_name)
+    model = create_fn(**model_args, **kwargs)
+
+    if checkpoint_path:
+        load_model_checkpoint(model, checkpoint_path, ema, auto_mapping)
+
+    return model
+
+
+
+ +

bit

+ + +
+ + + +

+ mindcv.models.bit.BiT_ResNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

BiT_ResNet model class, based on +"Big Transfer (BiT): General Visual Representation Learning" <https://arxiv.org/abs/1912.11370>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
block(Union[Bottleneck]) +
+

block of BiT_ResNetv2.

+
+

+

+
layers(tuple(int)) +
+

number of layers of each stage.

+
+

+

+
wf(int) +
+

width of each layer. Default: 1.

+
+

+

+
num_classes(int) +
+

number of classification classes. Default: 1000.

+
+

+

+
in_channels(int) +
+

number the channels of the input. Default: 3.

+
+

+

+
groups(int) +
+

number of groups for group conv in blocks. Default: 1.

+
+

+

+
base_width(int) +
+

base width of pre group hidden channel in blocks. Default: 64.

+
+

+

+
norm(nn.Cell) +
+

normalization layer in blocks. Default: None.

+
+

+

+
+ +
+ Source code in mindcv/models/bit.py +
149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
class BiT_ResNet(nn.Cell):
+    r"""BiT_ResNet model class, based on
+    `"Big Transfer (BiT): General Visual Representation Learning" <https://arxiv.org/abs/1912.11370>`_
+    Args:
+        block(Union[Bottleneck]): block of BiT_ResNetv2.
+        layers(tuple(int)): number of layers of each stage.
+        wf(int): width of each layer. Default: 1.
+        num_classes(int): number of classification classes. Default: 1000.
+        in_channels(int): number the channels of the input. Default: 3.
+        groups(int): number of groups for group conv in blocks. Default: 1.
+        base_width(int): base width of pre group hidden channel in blocks. Default: 64.
+        norm(nn.Cell): normalization layer in blocks. Default: None.
+    """
+
+    def __init__(
+        self,
+        block: Type[Union[Bottleneck]],
+        layers: List[int],
+        wf: int = 1,
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        groups: int = 1,
+        base_width: int = 64,
+        norm: Optional[nn.Cell] = None,
+    ) -> None:
+        super().__init__()
+
+        if norm is None:
+            norm = nn.GroupNorm
+
+        self.norm: nn.Cell = norm  # add type hints to make pylint happy
+        self.input_channels = 64 * wf
+        self.groups = groups
+        self.base_with = base_width
+
+        self.conv1 = StdConv2d(in_channels, self.input_channels, kernel_size=7,
+                               stride=2, pad_mode="pad", padding=3)
+        self.pad = nn.ConstantPad2d(1, 0)
+        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="valid")
+
+        self.layer1 = self._make_layer(block, 64 * wf, layers[0])
+        self.layer2 = self._make_layer(block, 128 * wf, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256 * wf, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512 * wf, layers[3], stride=2)
+
+        self.gn = norm(32, 2048 * wf)
+        self.relu = nn.ReLU()
+        self.pool = GlobalAvgPooling(keep_dims=True)
+        self.classifier = nn.Conv2d(512 * block.expansion * wf, num_classes, kernel_size=1, has_bias=True)
+
+    def _make_layer(
+        self,
+        block: Type[Union[Bottleneck]],
+        channels: int,
+        block_nums: int,
+        stride: int = 1,
+    ) -> nn.SequentialCell:
+        """build model depending on cfgs"""
+        down_sample = None
+
+        if stride != 1 or self.input_channels != channels * block.expansion:
+            down_sample = nn.SequentialCell([
+                StdConv2d(self.input_channels, channels * block.expansion, kernel_size=1, stride=stride),
+            ])
+
+        layers = []
+        layers.append(
+            block(
+                self.input_channels,
+                channels,
+                stride=stride,
+                down_sample=down_sample,
+                groups=self.groups,
+                base_width=self.base_with,
+                norm=self.norm,
+            )
+        )
+        self.input_channels = channels * block.expansion
+
+        for _ in range(1, block_nums):
+            layers.append(
+                block(
+                    self.input_channels,
+                    channels,
+                    groups=self.groups,
+                    base_width=self.base_with,
+                    norm=self.norm,
+                )
+            )
+
+        return nn.SequentialCell(layers)
+
+    def root(self, x: Tensor) -> Tensor:
+        x = self.conv1(x)
+        x = self.pad(x)
+        x = self.max_pool(x)
+        return x
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        """Network forward feature extraction."""
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.gn(x)
+        x = self.relu(x)
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.root(x)
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        assert x.shape[-2:] == (1, 1)  # We should have no spatial shape left.
+        return x[..., 0, 0]
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.bit.BiT_ResNet.forward_features(x) + +

+ + +
+ +

Network forward feature extraction.

+ +
+ Source code in mindcv/models/bit.py +
247
+248
+249
+250
+251
+252
+253
def forward_features(self, x: Tensor) -> Tensor:
+    """Network forward feature extraction."""
+    x = self.layer1(x)
+    x = self.layer2(x)
+    x = self.layer3(x)
+    x = self.layer4(x)
+    return x
+
+
+
+ +
+ + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.bit.BiT_resnet101(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 101 layers ResNet model. +Refer to the base class models.BiT_Resnet for more details.

+ +
+ Source code in mindcv/models/bit.py +
298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
@register_model
+def BiT_resnet101(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 101 layers ResNet model.
+    Refer to the base class `models.BiT_Resnet` for more details.
+    """
+    default_cfg = default_cfgs["BiT_resnet101"]
+    model = BiT_ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.bit.BiT_resnet50(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers ResNet model. +Refer to the base class models.BiT_Resnet for more details.

+ +
+ Source code in mindcv/models/bit.py +
270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
@register_model
+def BiT_resnet50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 50 layers ResNet model.
+    Refer to the base class `models.BiT_Resnet` for more details.
+    """
+    default_cfg = default_cfgs["BiT_resnet50"]
+    model = BiT_ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.bit.BiT_resnet50x3(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers ResNet model. +Refer to the base class models.BiT_Resnet for more details.

+ +
+ Source code in mindcv/models/bit.py +
284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
@register_model
+def BiT_resnet50x3(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 50 layers ResNet model.
+     Refer to the base class `models.BiT_Resnet` for more details.
+     """
+    default_cfg = default_cfgs["BiT_resnet50x3"]
+    model = BiT_ResNet(Bottleneck, [3, 4, 6, 3], wf=3, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

cait

+ + +
+ + + +

+ mindcv.models.cait.CaiT + + +

+ + +
+

+ Bases: nn.Cell

+ + +
+ Source code in mindcv/models/cait.py +
241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
class CaiT(nn.Cell):
+    def __init__(self,
+                 img_size: int = 224,
+                 patch_size: int = 16,
+                 in_channels: int = 3,
+                 num_classes: int = 1000,
+                 embed_dim: int = 768,
+                 depth: int = 12,
+                 num_heads: int = 12,
+                 mlp_ratio: float = 4.,
+                 qkv_bias: bool = False,
+                 qk_scale: float = None,
+                 drop_rate: float = 0.,
+                 attn_drop_rate: float = 0.,
+                 drop_path_rate: float = 0.,
+                 norm_layer: nn.Cell = nn.LayerNorm,
+                 act_layer: nn.Cell = nn.GELU,
+                 init_values: float = 1e-4,
+                 depth_token_only: int = 2,
+                 mlp_ratio_clstk: float = 4.0) -> None:
+        super(CaiT, self).__init__()
+        self.num_classes = num_classes
+        self.embed_dim = embed_dim
+
+        self.patch_embed = PatchEmbed(image_size=img_size,
+                                      patch_size=patch_size,
+                                      in_chans=in_channels,
+                                      embed_dim=embed_dim)
+
+        num_patches = self.patch_embed.num_patches
+
+        zeros = ops.Zeros()
+        self.cls_token = Parameter(zeros((1, 1, embed_dim), ms.float32))
+        self.pos_embed = Parameter(zeros((1, num_patches, embed_dim), ms.float32))
+        self.pos_drop = Dropout(p=drop_rate)
+
+        dpr = [drop_path_rate for i in range(depth)]
+
+        self.blocks = []
+        self.blocks_token_only = []
+
+        self.blocks = nn.CellList([
+            LayerScaleBlockSA(
+                dim=embed_dim,
+                num_heads=num_heads,
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop_rate=drop_rate,
+                attn_drop_rate=attn_drop_rate,
+                drop_path_rate=dpr[i],
+                act_layer=act_layer,
+                norm_layer=norm_layer,
+                init_values=init_values)
+            for i in range(depth)])
+        self.blocks_token_only = nn.CellList([
+            LayerScaleBlockCA(
+                dim=embed_dim,
+                num_heads=num_heads,
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop_rate=0.0,
+                attn_drop_rate=0.0,
+                drop_path_rate=0.0,
+                act_layer=act_layer,
+                norm_layer=norm_layer,
+                init_values=init_values)
+            for i in range(depth_token_only)])
+
+        self.norm = norm_layer((embed_dim,))
+
+        self.head = nn.Dense(embed_dim, num_classes) if num_classes > 0 else nn.Identity()
+
+        self.pos_embed = init.initializer(TruncatedNormal(sigma=0.02), self.pos_embed.shape, ms.float32)
+        self.cls_token = init.initializer(TruncatedNormal(sigma=0.02), self.cls_token.shape, ms.float32)
+
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        for name, m in self.cells_and_names():
+            if isinstance(m, nn.Dense):
+                m.weight = init.initializer(TruncatedNormal(sigma=0.02), m.weight.shape, ms.float32)
+                if m.bias is not None:
+                    m.bias.set_data(init.initializer(init.Constant(0), m.bias.shape))
+            elif isinstance(m, nn.LayerNorm):
+                m.beta.set_data(init.initializer(init.Constant(0), m.beta.shape))
+                m.gamma.set_data(init.initializer(init.Constant(1), m.gamma.shape))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        B = x.shape[0]
+        x = self.patch_embed(x)
+
+        cls_tokens = ops.broadcast_to(self.cls_token, (B, -1, -1))
+
+        x = x + self.pos_embed
+        x = self.pos_drop(x)
+
+        for i , blk in enumerate(self.blocks):
+            x = blk(x)
+        for i , blk in enumerate(self.blocks_token_only):
+            cls_tokens = blk(x, cls_tokens)
+
+        x = ops.concat((cls_tokens, x), axis=1)
+
+        x = self.norm(x)
+        return x[:, 0]
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.head(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.cait.cait_m36_384(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/cait.py +
419
+420
+421
+422
+423
+424
+425
+426
+427
+428
@register_model
+def cait_m36_384(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:
+    model = CaiT(img_size=384, patch_size=16, in_channels=in_channels, num_classes=num_classes,
+                 embed_dim=768, depth=36, num_heads=16, mlp_ratio=4, qkv_bias=False,
+                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-6, depth_token_only=2,
+                 **kwargs)
+
+    if pretrained:
+        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cait.cait_m48_448(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/cait.py +
431
+432
+433
+434
+435
+436
+437
+438
+439
+440
@register_model
+def cait_m48_448(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:
+    model = CaiT(img_size=448, patch_size=16, in_channels=in_channels, num_classes=num_classes,
+                 embed_dim=768, depth=48, num_heads=16, mlp_ratio=4, qkv_bias=False,
+                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-6, depth_token_only=2,
+                 **kwargs)
+
+    if pretrained:
+        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cait.cait_s24_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/cait.py +
383
+384
+385
+386
+387
+388
+389
+390
+391
+392
@register_model
+def cait_s24_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:
+    model = CaiT(img_size=224, patch_size=16, in_channels=in_channels, num_classes=num_classes,
+                 embed_dim=384, depth=24, num_heads=8, mlp_ratio=4, qkv_bias=False,
+                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-5, depth_token_only=2,
+                 **kwargs)
+
+    if pretrained:
+        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cait.cait_s24_384(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/cait.py +
395
+396
+397
+398
+399
+400
+401
+402
+403
+404
@register_model
+def cait_s24_384(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:
+    model = CaiT(img_size=384, patch_size=16, in_channels=in_channels, num_classes=num_classes,
+                 embed_dim=384, depth=24, num_heads=8, mlp_ratio=4, qkv_bias=False,
+                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-5, depth_token_only=2,
+                 **kwargs)
+
+    if pretrained:
+        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cait.cait_s36_384(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/cait.py +
407
+408
+409
+410
+411
+412
+413
+414
+415
+416
@register_model
+def cait_s36_384(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:
+    model = CaiT(img_size=384, patch_size=16, in_channels=in_channels, num_classes=num_classes,
+                 embed_dim=384, depth=36, num_heads=8, mlp_ratio=4, qkv_bias=False,
+                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-6, depth_token_only=2,
+                 **kwargs)
+
+    if pretrained:
+        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cait.cait_xs24_384(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/cait.py +
371
+372
+373
+374
+375
+376
+377
+378
+379
+380
@register_model
+def cait_xs24_384(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:
+    model = CaiT(img_size=384, patch_size=16, in_channels=in_channels, num_classes=num_classes,
+                 embed_dim=288, depth=24, num_heads=6, mlp_ratio=4, qkv_bias=False,
+                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-5, depth_token_only=2,
+                 **kwargs)
+
+    if pretrained:
+        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cait.cait_xxs24_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/cait.py +
359
+360
+361
+362
+363
+364
+365
+366
+367
+368
@register_model
+def cait_xxs24_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:
+    model = CaiT(img_size=224, patch_size=16, in_channels=in_channels, num_classes=num_classes,
+                 embed_dim=192, depth=24, num_heads=4, mlp_ratio=4, qkv_bias=False,
+                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-5, depth_token_only=2,
+                 **kwargs)
+
+    if pretrained:
+        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

cmt

+ + +
+ + + +

+ mindcv.models.cmt.CMT + + +

+ + +
+

+ Bases: nn.Cell

+ + +
+ Source code in mindcv/models/cmt.py +
205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
class CMT(nn.Cell):
+    def __init__(
+        self,
+        img_size=224,
+        in_channels=3,
+        num_classes=1000,
+        embed_dims=None,
+        stem_channel=16,
+        fc_dim=1280,
+        num_heads=None,
+        mlp_ratios=None,
+        qkv_bias=True,
+        qk_scale=None,
+        drop_rate=0.0,
+        attn_drop_rate=0.0,
+        drop_path_rate=0.0,
+        norm_layer=None,
+        depths=None,
+        qk_ratio=1,
+        sr_ratios=None,
+    ):
+        super().__init__()
+        self.num_classes = num_classes
+        self.num_features = self.embed_dim = embed_dims[-1]
+        norm_layer = norm_layer or nn.LayerNorm
+
+        self.stem_conv1 = nn.Conv2d(
+            3, stem_channel, kernel_size=3, stride=2, pad_mode='pad', padding=1, has_bias=True)
+        self.stem_relu1 = nn.GELU()
+        self.stem_norm1 = nn.BatchNorm2d(stem_channel)
+
+        self.stem_conv2 = nn.Conv2d(
+            stem_channel, stem_channel, kernel_size=3, stride=1, pad_mode='pad', padding=1, has_bias=True)
+        self.stem_relu2 = nn.GELU()
+        self.stem_norm2 = nn.BatchNorm2d(stem_channel)
+
+        self.stem_conv3 = nn.Conv2d(
+            stem_channel, stem_channel, kernel_size=3, stride=1, pad_mode='pad', padding=1, has_bias=True)
+        self.stem_relu3 = nn.GELU()
+        self.stem_norm3 = nn.BatchNorm2d(stem_channel)
+
+        self.patch_embed_a = PatchEmbed(
+            img_size=img_size // 2, patch_size=2, in_chans=stem_channel, embed_dim=embed_dims[0])
+        self.patch_embed_b = PatchEmbed(
+            img_size=img_size // 4, patch_size=2, in_chans=embed_dims[0], embed_dim=embed_dims[1])
+        self.patch_embed_c = PatchEmbed(
+            img_size=img_size // 8, patch_size=2, in_chans=embed_dims[1], embed_dim=embed_dims[2])
+        self.patch_embed_d = PatchEmbed(
+            img_size=img_size // 16, patch_size=2, in_chans=embed_dims[2], embed_dim=embed_dims[3])
+
+        self.relative_pos_a = ops.zeros(
+            (num_heads[0], self.patch_embed_a.num_patches,
+             self.patch_embed_a.num_patches // sr_ratios[0] // sr_ratios[0]),
+            mindspore.float32)
+        self.relative_pos_b = ops.zeros(
+            (num_heads[1], self.patch_embed_b.num_patches,
+             self.patch_embed_b.num_patches // sr_ratios[1] // sr_ratios[1]),
+            mindspore.float32)
+        self.relative_pos_c = ops.zeros(
+            (num_heads[2], self.patch_embed_c.num_patches,
+             self.patch_embed_c.num_patches // sr_ratios[2] // sr_ratios[2]),
+            mindspore.float32)
+        self.relative_pos_d = ops.zeros(
+            (num_heads[3], self.patch_embed_d.num_patches,
+             self.patch_embed_d.num_patches // sr_ratios[3] // sr_ratios[3]),
+            mindspore.float32)
+
+        # stochastic depth decay rule
+        dpr = [x.item() for x in np.linspace(0, drop_path_rate, sum(depths))]
+        cur = 0
+        self.blocks_a = nn.CellList([
+            Block(
+                dim=embed_dims[0], num_heads=num_heads[0], mlp_ratio=mlp_ratios[0], qkv_bias=qkv_bias,
+                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[
+                    cur + i],
+                norm_layer=norm_layer, qk_ratio=qk_ratio, sr_ratio=sr_ratios[0])
+            for i in range(depths[0])])
+        cur += depths[0]
+        self.blocks_b = nn.CellList([
+            Block(
+                dim=embed_dims[1], num_heads=num_heads[1], mlp_ratio=mlp_ratios[1], qkv_bias=qkv_bias,
+                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[
+                    cur + i],
+                norm_layer=norm_layer, qk_ratio=qk_ratio, sr_ratio=sr_ratios[1])
+            for i in range(depths[1])])
+        cur += depths[1]
+        self.blocks_c = nn.CellList([
+            Block(
+                dim=embed_dims[2], num_heads=num_heads[2], mlp_ratio=mlp_ratios[2], qkv_bias=qkv_bias,
+                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[
+                    cur + i],
+                norm_layer=norm_layer, qk_ratio=qk_ratio, sr_ratio=sr_ratios[2])
+            for i in range(depths[2])])
+        cur += depths[2]
+        self.blocks_d = nn.CellList([
+            Block(
+                dim=embed_dims[3], num_heads=num_heads[3], mlp_ratio=mlp_ratios[3], qkv_bias=qkv_bias,
+                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[
+                    cur + i],
+                norm_layer=norm_layer, qk_ratio=qk_ratio, sr_ratio=sr_ratios[3])
+            for i in range(depths[3])])
+
+        # Classifier head
+        self._fc = nn.Conv2d(
+            embed_dims[-1], fc_dim, kernel_size=1, has_bias=True)
+        self._bn = nn.BatchNorm2d(fc_dim)
+        self._drop = Dropout(p=drop_rate)
+        self.head = nn.Dense(
+            fc_dim, num_classes) if num_classes > 0 else ops.Identity()
+
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(mode='fan_out', nonlinearity='relu'),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer('zeros', cell.bias.shape, cell.bias.dtype))
+
+            if isinstance(cell, nn.Dense):
+                cell.weight.set_data(init.initializer(init.TruncatedNormal(sigma=0.02), cell.weight.shape,
+                                                      cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer('zeros', cell.bias.shape, cell.bias.dtype))
+
+            elif isinstance(cell, (nn.LayerNorm, nn.BatchNorm2d)):
+                cell.gamma.set_data(init.initializer('ones', cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer('zeros', cell.beta.shape, cell.beta.dtype))
+
+    def forward_features(self, x):
+        B = x.shape[0]
+        x = self.stem_conv1(x)
+        x = self.stem_relu1(x)
+        x = self.stem_norm1(x)
+
+        x = self.stem_conv2(x)
+        x = self.stem_relu2(x)
+        x = self.stem_norm2(x)
+
+        x = self.stem_conv3(x)
+        x = self.stem_relu3(x)
+        x = self.stem_norm3(x)
+
+        x, (H, W) = self.patch_embed_a(x)
+        for _, blk in enumerate(self.blocks_a):
+            x = blk(x, H, W, self.relative_pos_a)
+
+        x = ops.transpose(x.reshape(B, H, W, -1), (0, 3, 1, 2))
+        x, (H, W) = self.patch_embed_b(x)
+        for _, blk in enumerate(self.blocks_b):
+            x = blk(x, H, W, self.relative_pos_b)
+
+        x = ops.transpose(x.reshape(B, H, W, -1), (0, 3, 1, 2))
+        x, (H, W) = self.patch_embed_c(x)
+        for _, blk in enumerate(self.blocks_c):
+            x = blk(x, H, W, self.relative_pos_c)
+
+        x = ops.transpose(x.reshape(B, H, W, -1), (0, 3, 1, 2))
+        x, (H, W) = self.patch_embed_d(x)
+        for _, blk in enumerate(self.blocks_d):
+            x = blk(x, H, W, self.relative_pos_d)
+
+        B, _, C = x.shape
+
+        x = self._fc(ops.transpose(x, (0, 2, 1)).reshape(B, C, H, W))
+        x = self._bn(x)
+        x = swish(x)
+        x = GlobalAvgPooling()(x)
+        x = self._drop(x)
+        return x
+
+    def forward_head(self, x):
+        x = self.head(x)
+        return x
+
+    def construct(self, x):
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.cmt.cmt_base(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

CMT-Base

+ +
+ Source code in mindcv/models/cmt.py +
441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
@register_model
+def cmt_base(pretrained=False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """
+    CMT-Base
+    """
+    default_cfg = default_cfgs["cmt_base"]
+
+    model = CMT(img_size=256, num_classes=num_classes, in_channels=in_channels, qkv_bias=True,
+                embed_dims=[76, 152, 304, 608], stem_channel=38, num_heads=[1, 2, 4, 8], depths=[4, 4, 20, 4],
+                mlp_ratios=[4, 4, 4, 4], qk_ratio=1, sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cmt.cmt_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

CMT-Small

+ +
+ Source code in mindcv/models/cmt.py +
424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
@register_model
+def cmt_small(pretrained=False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """
+    CMT-Small
+    """
+    default_cfg = default_cfgs["cmt_small"]
+
+    model = CMT(img_size=224, num_classes=num_classes, in_channels=in_channels, qkv_bias=True,
+                embed_dims=[64, 128, 256, 512], stem_channel=32, num_heads=[1, 2, 4, 8], depths=[3, 3, 16, 3],
+                mlp_ratios=[4, 4, 4, 4], qk_ratio=1, sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cmt.cmt_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

CMT-tiny

+ +
+ Source code in mindcv/models/cmt.py +
390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
@register_model
+def cmt_tiny(pretrained=False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """
+    CMT-tiny
+    """
+    default_cfg = default_cfgs["cmt_tiny"]
+
+    model = CMT(img_size=160, num_classes=num_classes, in_channels=in_channels, qkv_bias=True,
+                embed_dims=[46, 92, 184, 368], stem_channel=16, num_heads=[1, 2, 4, 8], depths=[2, 2, 10, 2],
+                mlp_ratios=[3.6, 3.6, 3.6, 3.6], qk_ratio=1, sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cmt.cmt_xsmall(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

CMT-XSmall

+ +
+ Source code in mindcv/models/cmt.py +
407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
@register_model
+def cmt_xsmall(pretrained=False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """
+    CMT-XSmall
+    """
+    default_cfg = default_cfgs["cmt_xsmall"]
+
+    model = CMT(img_size=192, num_classes=num_classes, in_channels=in_channels, qkv_bias=True,
+                embed_dims=[52, 104, 208, 416], stem_channel=16, num_heads=[1, 2, 4, 8], depths=[3, 3, 12, 3],
+                mlp_ratios=[3.8, 3.8, 3.8, 3.8], qk_ratio=1, sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

coat

+ + + +
+ + + +

+mindcv.models.coat.coat_lite_medium(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/coat.py +
777
+778
+779
+780
+781
+782
+783
+784
+785
+786
+787
+788
@register_model
+def coat_lite_medium(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs['coat_lite_medium']
+    model = CoaT(in_channels=in_channels, num_classes=num_classes,
+                 patch_size=4, embed_dims=[128, 256, 320, 512],
+                 serial_depths=[3, 6, 10, 8], parallel_depth=0,
+                 num_heads=8, mlp_ratios=[4, 4, 4, 4], **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.coat.coat_lite_mini(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/coat.py +
749
+750
+751
+752
+753
+754
+755
+756
+757
+758
+759
+760
@register_model
+def coat_lite_mini(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs['coat_lite_mini']
+    model = CoaT(in_channels=in_channels, num_classes=num_classes,
+                 patch_size=4, embed_dims=[64, 128, 320, 512],
+                 serial_depths=[2, 2, 2, 2], parallel_depth=0,
+                 num_heads=8, mlp_ratios=[8, 8, 4, 4], **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.coat.coat_lite_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/coat.py +
763
+764
+765
+766
+767
+768
+769
+770
+771
+772
+773
+774
@register_model
+def coat_lite_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs['coat_lite_small']
+    model = CoaT(in_channels=in_channels, num_classes=num_classes,
+                 patch_size=4, embed_dims=[64, 128, 320, 512],
+                 serial_depths=[3, 4, 6, 3], parallel_depth=0,
+                 num_heads=8, mlp_ratios=[8, 8, 4, 4], **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.coat.coat_lite_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/coat.py +
735
+736
+737
+738
+739
+740
+741
+742
+743
+744
+745
+746
@register_model
+def coat_lite_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs['coat_lite_tiny']
+    model = CoaT(in_channels=in_channels, num_classes=num_classes,
+                 patch_size=4, embed_dims=[64, 128, 256, 320],
+                 serial_depths=[2, 2, 2, 2], parallel_depth=0,
+                 num_heads=8, mlp_ratios=[8, 8, 4, 4], **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.coat.coat_mini(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/coat.py +
707
+708
+709
+710
+711
+712
+713
+714
+715
+716
+717
+718
@register_model
+def coat_mini(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs['coat_mini']
+    model = CoaT(in_channels=in_channels, num_classes=num_classes,
+                 patch_size=4, embed_dims=[152, 216, 216, 216],
+                 serial_depths=[2, 2, 2, 2], parallel_depth=6,
+                 num_heads=8, mlp_ratios=[4, 4, 4, 4], **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.coat.coat_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/coat.py +
721
+722
+723
+724
+725
+726
+727
+728
+729
+730
+731
+732
@register_model
+def coat_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs['coat_small']
+    model = CoaT(in_channels=in_channels, num_classes=num_classes,
+                 patch_size=4, embed_dims=[152, 320, 320, 320],
+                 serial_depths=[2, 2, 2, 2], parallel_depth=6,
+                 num_heads=8, mlp_ratios=[4, 4, 4, 4], **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.coat.coat_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/coat.py +
693
+694
+695
+696
+697
+698
+699
+700
+701
+702
+703
+704
@register_model
+def coat_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs['coat_mini']
+    model = CoaT(in_channels=in_channels, num_classes=num_classes,
+                 patch_size=4, embed_dims=[152, 152, 152, 152],
+                 serial_depths=[2, 2, 2, 2], parallel_depth=6,
+                 num_heads=8, mlp_ratios=[4, 4, 4, 4], **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

convit

+ + +
+ + + +

+ mindcv.models.convit.ConViT + + +

+ + +
+

+ Bases: nn.Cell

+ + +

ConViT model class, based on +'"Improving Vision Transformers with Soft Convolutional Inductive Biases" +https://arxiv.org/pdf/2103.10697.pdf'

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int) + + + DEFAULT: + 1000 + +

+
image_size +
+

images input size. Default: 224.

+
+

+ + TYPE: + int) + + + DEFAULT: + 224 + +

+
patch_size +
+

image patch size. Default: 16.

+
+

+ + TYPE: + int) + + + DEFAULT: + 16 + +

+
embed_dim +
+

embedding dimension in all head. Default: 48.

+
+

+ + TYPE: + int) + + + DEFAULT: + 48 + +

+
num_heads +
+

number of heads. Default: 12.

+
+

+ + TYPE: + int) + + + DEFAULT: + 12 + +

+
drop_rate +
+

dropout rate. Default: 0.

+
+

+ + TYPE: + float) + + + DEFAULT: + 0.0 + +

+
drop_path_rate +
+

drop path rate. Default: 0.1.

+
+

+ + TYPE: + float) + + + DEFAULT: + 0.1 + +

+
depth +
+

model block depth. Default: 12.

+
+

+ + TYPE: + int) + + + DEFAULT: + 12 + +

+
mlp_ratio +
+

ratio of hidden features in Mlp. Default: 4.

+
+

+ + TYPE: + float) + + + DEFAULT: + 4.0 + +

+
qkv_bias +
+

have bias in qkv layers or not. Default: False.

+
+

+ + TYPE: + bool) + + + DEFAULT: + False + +

+
attn_drop_rate +
+

attention layers dropout rate. Default: 0.

+
+

+ + TYPE: + float) + + + DEFAULT: + 0.0 + +

+
locality_strength +
+

determines how focused each head is around its attention center. Default: 1.

+
+

+ + TYPE: + float) + + + DEFAULT: + 1.0 + +

+
local_up_to_layer +
+

number of GPSA layers. Default: 10.

+
+

+ + TYPE: + int) + + + DEFAULT: + 10 + +

+
use_pos_embed +
+

whether use the embeded position. Default: True.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
locality_strength(float) +
+

the strength of locality. Default: 1.

+
+

+

+
+ +
+ Source code in mindcv/models/convit.py +
210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
class ConViT(nn.Cell):
+    r"""ConViT model class, based on
+    '"Improving Vision Transformers with Soft Convolutional Inductive Biases"
+    <https://arxiv.org/pdf/2103.10697.pdf>'
+
+    Args:
+        in_channels (int): number the channels of the input. Default: 3.
+        num_classes (int) : number of classification classes. Default: 1000.
+        image_size (int) : images input size. Default: 224.
+        patch_size (int) : image patch size. Default: 16.
+        embed_dim (int) : embedding dimension in all head. Default: 48.
+        num_heads (int) : number of heads. Default: 12.
+        drop_rate (float) : dropout rate. Default: 0.
+        drop_path_rate (float) : drop path rate. Default: 0.1.
+        depth (int) : model block depth. Default: 12.
+        mlp_ratio (float) : ratio of hidden features in Mlp. Default: 4.
+        qkv_bias (bool) : have bias in qkv layers or not. Default: False.
+        attn_drop_rate (float) : attention layers dropout rate. Default: 0.
+        locality_strength (float) : determines how focused each head is around its attention center. Default: 1.
+        local_up_to_layer (int) : number of GPSA layers. Default: 10.
+        use_pos_embed (bool): whether use the embeded position.  Default: True.
+        locality_strength(float): the strength of locality. Default: 1.
+    """
+
+    def __init__(
+        self,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+        image_size: int = 224,
+        patch_size: int = 16,
+        embed_dim: int = 48,
+        num_heads: int = 12,
+        drop_rate: float = 0.0,
+        drop_path_rate: float = 0.1,
+        depth: int = 12,
+        mlp_ratio: float = 4.0,
+        qkv_bias: bool = False,
+        attn_drop_rate: float = 0.0,
+        local_up_to_layer: int = 10,
+        use_pos_embed: bool = True,
+        locality_strength: float = 1.0,
+    ) -> None:
+        super().__init__()
+
+        self.local_up_to_layer = local_up_to_layer
+        self.use_pos_embed = use_pos_embed
+        self.num_heads = num_heads
+        self.locality_strength = locality_strength
+        self.embed_dim = embed_dim
+
+        self.patch_embed = PatchEmbed(
+            image_size=image_size, patch_size=patch_size, in_chans=in_channels, embed_dim=embed_dim)
+        self.num_patches = self.patch_embed.num_patches
+
+        self.cls_token = Parameter(ops.Zeros()((1, 1, embed_dim), ms.float32))
+        self.pos_drop = Dropout(p=drop_rate)
+
+        if self.use_pos_embed:
+            self.pos_embed = Parameter(ops.Zeros()((1, self.num_patches, embed_dim), ms.float32))
+            self.pos_embed.set_data(init.initializer(init.TruncatedNormal(sigma=0.02), self.pos_embed.data.shape))
+
+        dpr = [x.item() for x in np.linspace(0, drop_path_rate, depth)]
+        self.blocks = nn.CellList([
+            Block(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
+                drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
+                use_gpsa=True)
+            if i < local_up_to_layer else
+            Block(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
+                drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
+                use_gpsa=False)
+            for i in range(depth)])
+        self.norm = nn.LayerNorm((embed_dim,))
+
+        self.classifier = nn.Dense(in_channels=embed_dim, out_channels=num_classes) if num_classes > 0 else Identity()
+        self.cls_token.set_data(init.initializer(init.TruncatedNormal(sigma=0.02), self.cls_token.data.shape))
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Dense):
+                cell.weight.set_data(init.initializer(init.TruncatedNormal(sigma=0.02), cell.weight.data.shape))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Constant(0), cell.bias.shape))
+            elif isinstance(cell, nn.LayerNorm):
+                cell.gamma.set_data(init.initializer(init.Constant(1), cell.gamma.shape))
+                cell.beta.set_data(init.initializer(init.Constant(0), cell.beta.shape))
+        # local init
+        for i in range(self.local_up_to_layer):
+            self.blocks[i].attn.v.weight.set_data(ops.eye(self.embed_dim, self.embed_dim, ms.float32), slice_shape=True)
+            locality_distance = 1
+            kernel_size = int(self.num_heads**0.5)
+            center = (kernel_size - 1) / 2 if kernel_size % 2 == 0 else kernel_size // 2
+            pos_weight_data = self.blocks[i].attn.pos_proj.weight.data
+            for h1 in range(kernel_size):
+                for h2 in range(kernel_size):
+                    position = h1 + kernel_size * h2
+                    pos_weight_data[position, 2] = -1
+                    pos_weight_data[position, 1] = 2 * (h1 - center) * locality_distance
+                    pos_weight_data[position, 0] = 2 * (h2 - center) * locality_distance
+            pos_weight_data = pos_weight_data * self.locality_strength
+            self.blocks[i].attn.pos_proj.weight.set_data(pos_weight_data)
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.patch_embed(x)
+        if self.use_pos_embed:
+            x = x + self.pos_embed
+        x = self.pos_drop(x)
+        cls_tokens = ops.tile(self.cls_token, (x.shape[0], 1, 1))
+        for u, blk in enumerate(self.blocks):
+            if u == self.local_up_to_layer:
+                x = ops.Cast()(x, cls_tokens.dtype)
+                x = ops.concat((cls_tokens, x), 1)
+            x = blk(x)
+        x = self.norm(x)
+        return x[:, 0]
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.convit.convit_base(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConViT base model +Refer to the base class "models.ConViT" for more details.

+ +
+ Source code in mindcv/models/convit.py +
398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
@register_model
+def convit_base(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:
+    """Get ConViT base model
+    Refer to the base class "models.ConViT" for more details.
+    """
+    default_cfg = default_cfgs["convit_base"]
+    model = ConViT(in_channels=in_channels, num_classes=num_classes,
+                   num_heads=16, embed_dim=768, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convit.convit_base_plus(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConViT base+ model +Refer to the base class "models.ConViT" for more details.

+ +
+ Source code in mindcv/models/convit.py +
413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
@register_model
+def convit_base_plus(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:
+    """Get ConViT base+ model
+    Refer to the base class "models.ConViT" for more details.
+    """
+    default_cfg = default_cfgs["convit_base_plus"]
+    model = ConViT(in_channels=in_channels, num_classes=num_classes,
+                   num_heads=16, embed_dim=1024, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convit.convit_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConViT small model +Refer to the base class "models.ConViT" for more details.

+ +
+ Source code in mindcv/models/convit.py +
368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
@register_model
+def convit_small(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:
+    """Get ConViT small model
+    Refer to the base class "models.ConViT" for more details.
+    """
+    default_cfg = default_cfgs["convit_small"]
+    model = ConViT(in_channels=in_channels, num_classes=num_classes,
+                   num_heads=9, embed_dim=432, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convit.convit_small_plus(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConViT small+ model +Refer to the base class "models.ConViT" for more details.

+ +
+ Source code in mindcv/models/convit.py +
383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
@register_model
+def convit_small_plus(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:
+    """Get ConViT small+ model
+    Refer to the base class "models.ConViT" for more details.
+    """
+    default_cfg = default_cfgs["convit_small_plus"]
+    model = ConViT(in_channels=in_channels, num_classes=num_classes,
+                   num_heads=9, embed_dim=576, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convit.convit_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConViT tiny model +Refer to the base class "models.ConViT" for more details.

+ +
+ Source code in mindcv/models/convit.py +
338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
@register_model
+def convit_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:
+    """Get ConViT tiny model
+    Refer to the base class "models.ConViT" for more details.
+    """
+    default_cfg = default_cfgs["convit_tiny"]
+    model = ConViT(in_channels=in_channels, num_classes=num_classes,
+                   num_heads=4, embed_dim=192, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convit.convit_tiny_plus(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConViT tiny+ model +Refer to the base class "models.ConViT" for more details.

+ +
+ Source code in mindcv/models/convit.py +
353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
@register_model
+def convit_tiny_plus(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:
+    """Get ConViT tiny+ model
+    Refer to the base class "models.ConViT" for more details.
+    """
+    default_cfg = default_cfgs["convit_tiny_plus"]
+    model = ConViT(in_channels=in_channels, num_classes=num_classes,
+                   num_heads=4, embed_dim=256, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

convnext

+ + +
+ + + +

+ mindcv.models.convnext.ConvNeXt + + +

+ + +
+

+ Bases: nn.Cell

+ + +

ConvNeXt and ConvNeXt V2 model class, based on +"A ConvNet for the 2020s" <https://arxiv.org/abs/2201.03545>_ and +"ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders" <https://arxiv.org/abs/2301.00808>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
in_channels +
+

dim of the input channel.

+
+

+ + TYPE: + int + +

+
num_classes +
+

dim of the classes predicted.

+
+

+ + TYPE: + int + +

+
depths +
+

the depths of each layer.

+
+

+ + TYPE: + List[int] + +

+
dims +
+

the middle dim of each layer.

+
+

+ + TYPE: + List[int] + +

+
drop_path_rate +
+

the rate of droppath. Default: 0.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
layer_scale_init_value +
+

the parameter of init for the classifier. Default: 1e-6.

+
+

+ + TYPE: + float + + + DEFAULT: + 1e-06 + +

+
head_init_scale +
+

the parameter of init for the head. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
use_grn +
+

If True, use Global Response Normalization in each block. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/convnext.py +
156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
class ConvNeXt(nn.Cell):
+    r"""ConvNeXt and ConvNeXt V2 model class, based on
+    `"A ConvNet for the 2020s" <https://arxiv.org/abs/2201.03545>`_ and
+    `"ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders" <https://arxiv.org/abs/2301.00808>`_
+
+    Args:
+        in_channels: dim of the input channel.
+        num_classes: dim of the classes predicted.
+        depths: the depths of each layer.
+        dims: the middle dim of each layer.
+        drop_path_rate: the rate of droppath. Default: 0.0.
+        layer_scale_init_value: the parameter of init for the classifier. Default: 1e-6.
+        head_init_scale: the parameter of init for the head. Default: 1.0.
+        use_grn: If True, use Global Response Normalization in each block. Default: False.
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        num_classes: int,
+        depths: List[int],
+        dims: List[int],
+        drop_path_rate: float = 0.0,
+        layer_scale_init_value: float = 1e-6,
+        head_init_scale: float = 1.0,
+        use_grn: bool = False,
+    ):
+        super().__init__()
+
+        downsample_layers = []  # stem and 3 intermediate down_sampling conv layers
+        stem = nn.SequentialCell(
+            nn.Conv2d(in_channels, dims[0], kernel_size=4, stride=4, has_bias=True),
+            ConvNextLayerNorm((dims[0],), epsilon=1e-6, norm_axis=1),
+        )
+        downsample_layers.append(stem)
+        for i in range(3):
+            downsample_layer = nn.SequentialCell(
+                ConvNextLayerNorm((dims[i],), epsilon=1e-6, norm_axis=1),
+                nn.Conv2d(dims[i], dims[i + 1], kernel_size=2, stride=2, has_bias=True),
+            )
+            downsample_layers.append(downsample_layer)
+
+        total_reduction = 4
+        self.feature_info = []
+        self.flatten_sequential = True
+
+        stages = []  # 4 feature resolution stages, each consisting of multiple residual blocks
+        dp_rates = list(np.linspace(0, drop_path_rate, sum(depths)))
+        cur = 0
+        for i in range(4):
+            blocks = []
+            for j in range(depths[i]):
+                blocks.append(Block(dim=dims[i], drop_path=dp_rates[cur + j],
+                                    layer_scale_init_value=layer_scale_init_value, use_grn=use_grn))
+            stage = nn.SequentialCell(blocks)
+            stages.append(stage)
+            cur += depths[i]
+
+            if i > 0:
+                total_reduction *= 2
+            self.feature_info.append(dict(chs=dims[i], reduction=total_reduction, name=f'feature.{i * 2 + 1}'))
+
+        self.feature = nn.SequentialCell([
+            downsample_layers[0],
+            stages[0],
+            downsample_layers[1],
+            stages[1],
+            downsample_layers[2],
+            stages[2],
+            downsample_layers[3],
+            stages[3]
+        ])
+        self.norm = ConvNextLayerNorm((dims[-1],), epsilon=1e-6)  # final norm layer
+        self.classifier = nn.Dense(dims[-1], num_classes)  # classifier
+        self.head_init_scale = head_init_scale
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, (nn.Dense, nn.Conv2d)):
+                cell.weight.set_data(
+                    init.initializer(init.TruncatedNormal(sigma=0.02), cell.weight.shape, cell.weight.dtype)
+                )
+                if isinstance(cell, nn.Dense) and cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Zero(), cell.bias.shape, cell.bias.dtype))
+        self.classifier.weight.set_data(self.classifier.weight * self.head_init_scale)
+        self.classifier.bias.set_data(self.classifier.bias * self.head_init_scale)
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.classifier(x)
+        return x
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.feature(x)
+        return self.norm(x.mean([-2, -1]))  # global average pooling, (N, C, H, W) -> (N, C)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnext_base(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt base model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
287
+288
+289
+290
+291
+292
+293
+294
+295
+296
@register_model
+def convnext_base(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt base model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnext_base"]
+    model_args = dict(
+        in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], **kwargs
+    )
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnext_large(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt large model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
299
+300
+301
+302
+303
+304
+305
+306
+307
+308
@register_model
+def convnext_large(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt large model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnext_large"]
+    model_args = dict(
+        in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], **kwargs
+    )
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnext_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt small model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
275
+276
+277
+278
+279
+280
+281
+282
+283
+284
@register_model
+def convnext_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt small model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnext_small"]
+    model_args = dict(
+        in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3], dims=[96, 192, 384, 768], **kwargs
+    )
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnext_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt tiny model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
263
+264
+265
+266
+267
+268
+269
+270
+271
+272
@register_model
+def convnext_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt tiny model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnext_tiny"]
+    model_args = dict(
+        in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], **kwargs
+    )
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnext_xlarge(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt xlarge model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
311
+312
+313
+314
+315
+316
+317
+318
+319
+320
@register_model
+def convnext_xlarge(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt xlarge model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnext_xlarge"]
+    model_args = dict(
+        in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3], dims=[256, 512, 1024, 2048], **kwargs
+    )
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_atto(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 atto model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
323
+324
+325
+326
+327
+328
+329
+330
+331
@register_model
+def convnextv2_atto(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 atto model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_atto"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[2, 2, 6, 2],
+                      dims=[40, 80, 160, 320], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_base(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 base model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
378
+379
+380
+381
+382
+383
+384
+385
+386
@register_model
+def convnextv2_base(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 base model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_base"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3],
+                      dims=[128, 256, 512, 1024], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_femto(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 femto model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
334
+335
+336
+337
+338
+339
+340
+341
+342
@register_model
+def convnextv2_femto(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 femto model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_femto"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[2, 2, 6, 2],
+                      dims=[48, 96, 192, 384], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_huge(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 huge model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
400
+401
+402
+403
+404
+405
+406
+407
+408
@register_model
+def convnextv2_huge(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 huge model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_huge"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3],
+                      dims=[352, 704, 1408, 2816], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_large(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 large model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
389
+390
+391
+392
+393
+394
+395
+396
+397
@register_model
+def convnextv2_large(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 large model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_large"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3],
+                      dims=[192, 384, 768, 1536], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_nano(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 nano model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
356
+357
+358
+359
+360
+361
+362
+363
+364
@register_model
+def convnextv2_nano(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 nano model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_nano"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[2, 2, 8, 2],
+                      dims=[80, 160, 320, 640], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_pico(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 pico model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
345
+346
+347
+348
+349
+350
+351
+352
+353
@register_model
+def convnextv2_pico(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 pico model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_pico"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[2, 2, 6, 2],
+                      dims=[64, 128, 256, 512], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 tiny model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
367
+368
+369
+370
+371
+372
+373
+374
+375
@register_model
+def convnextv2_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 tiny model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_tiny"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 9, 3],
+                      dims=[96, 192, 384, 768], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +

crossvit

+ + + +
+ + + +

+mindcv.models.crossvit.crossvit_15(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/crossvit.py +
466
+467
+468
+469
+470
+471
+472
+473
+474
+475
@register_model
+def crossvit_15(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VisionTransformer:
+    model = VisionTransformer(img_size=[240, 224],
+                              patch_size=[12, 16], embed_dim=[192, 384], depth=[[1, 5, 0], [1, 5, 0], [1, 5, 0]],
+                              num_heads=[6, 6], mlp_ratio=[3, 3, 1], qkv_bias=True,
+                              norm_layer=nn.LayerNorm, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    default_cfg = default_cfgs["crossvit_15"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.crossvit.crossvit_18(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/crossvit.py +
478
+479
+480
+481
+482
+483
+484
+485
+486
+487
@register_model
+def crossvit_18(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VisionTransformer:
+    model = VisionTransformer(img_size=[240, 224],
+                              patch_size=[12, 16], embed_dim=[224, 448], depth=[[1, 6, 0], [1, 6, 0], [1, 6, 0]],
+                              num_heads=[7, 7], mlp_ratio=[3, 3, 1], qkv_bias=True,
+                              norm_layer=nn.LayerNorm, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    default_cfg = default_cfgs["crossvit_18"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.crossvit.crossvit_9(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/crossvit.py +
454
+455
+456
+457
+458
+459
+460
+461
+462
+463
@register_model
+def crossvit_9(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    model = VisionTransformer(img_size=[240, 224],
+                              patch_size=[12, 16], embed_dim=[128, 256], depth=[[1, 3, 0], [1, 3, 0], [1, 3, 0]],
+                              num_heads=[4, 4], mlp_ratio=[3, 3, 1], qkv_bias=True,
+                              norm_layer=nn.LayerNorm, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    default_cfg = default_cfgs["crossvit_9"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

densenet

+ + +
+ + + +

+ mindcv.models.densenet.DenseNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Densenet-BC model class, based on +"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
growth_rate +
+

how many filters to add each layer (k in paper). Default: 32.

+
+

+ + TYPE: + int + + + DEFAULT: + 32 + +

+
block_config +
+

how many layers in each pooling block. Default: (6, 12, 24, 16).

+
+

+ + TYPE: + Tuple[int, int, int, int] + + + DEFAULT: + (6, 12, 24, 16) + +

+
num_init_features +
+

number of filters in the first Conv2d. Default: 64.

+
+

+ + TYPE: + int + + + DEFAULT: + 64 + +

+
bn_size +
+

multiplicative factor for number of bottleneck layers +(i.e. bn_size * k features in the bottleneck layer). Default: 4.

+
+

+ + TYPE: + int + + + DEFAULT: + 4 + +

+
drop_rate +
+

dropout rate after each dense layer. Default: 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
in_channels +
+

number of input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
+ +
+ Source code in mindcv/models/densenet.py +
126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
class DenseNet(nn.Cell):
+    r"""Densenet-BC model class, based on
+    `"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
+
+    Args:
+        growth_rate: how many filters to add each layer (`k` in paper). Default: 32.
+        block_config: how many layers in each pooling block. Default: (6, 12, 24, 16).
+        num_init_features: number of filters in the first Conv2d. Default: 64.
+        bn_size (int): multiplicative factor for number of bottleneck layers
+          (i.e. bn_size * k features in the bottleneck layer). Default: 4.
+        drop_rate: dropout rate after each dense layer. Default: 0.
+        in_channels: number of input channels. Default: 3.
+        num_classes: number of classification classes. Default: 1000.
+    """
+
+    def __init__(
+        self,
+        growth_rate: int = 32,
+        block_config: Tuple[int, int, int, int] = (6, 12, 24, 16),
+        num_init_features: int = 64,
+        bn_size: int = 4,
+        drop_rate: float = 0.0,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+    ) -> None:
+        super().__init__()
+        layers = OrderedDict()
+        # first Conv2d
+        num_features = num_init_features
+        layers["conv0"] = nn.Conv2d(in_channels, num_features, kernel_size=7, stride=2, pad_mode="pad", padding=3)
+        layers["norm0"] = nn.BatchNorm2d(num_features)
+        layers["relu0"] = nn.ReLU()
+        layers["pool0"] = nn.SequentialCell([
+            nn.Pad(paddings=((0, 0), (0, 0), (1, 1), (1, 1)), mode="CONSTANT"),
+            nn.MaxPool2d(kernel_size=3, stride=2),
+        ])
+
+        # DenseBlock
+        for i, num_layers in enumerate(block_config):
+            block = _DenseBlock(
+                num_layers=num_layers,
+                num_input_features=num_features,
+                bn_size=bn_size,
+                growth_rate=growth_rate,
+                drop_rate=drop_rate,
+            )
+            layers[f"denseblock{i + 1}"] = block
+            num_features += num_layers * growth_rate
+            if i != len(block_config) - 1:
+                transition = _Transition(num_features, num_features // 2)
+                layers[f"transition{i + 1}"] = transition
+                num_features = num_features // 2
+
+        # final bn+ReLU
+        layers["norm5"] = nn.BatchNorm2d(num_features)
+        layers["relu5"] = nn.ReLU()
+
+        self.num_features = num_features
+        self.features = nn.SequentialCell(layers)
+        self.pool = GlobalAvgPooling()
+        self.classifier = nn.Dense(self.num_features, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(math.sqrt(5), mode="fan_out", nonlinearity="relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="leaky_relu"),
+                                         cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="leaky_relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.densenet.densenet121(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 121 layers DenseNet model. +Refer to the base class models.DenseNet for more details.

+ +
+ Source code in mindcv/models/densenet.py +
225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
@register_model
+def densenet121(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DenseNet:
+    """Get 121 layers DenseNet model.
+     Refer to the base class `models.DenseNet` for more details."""
+    default_cfg = default_cfgs["densenet121"]
+    model = DenseNet(growth_rate=32, block_config=(6, 12, 24, 16), num_init_features=64, in_channels=in_channels,
+                     num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.densenet.densenet161(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 161 layers DenseNet model. +Refer to the base class models.DenseNet for more details.

+ +
+ Source code in mindcv/models/densenet.py +
239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
@register_model
+def densenet161(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DenseNet:
+    """Get 161 layers DenseNet model.
+     Refer to the base class `models.DenseNet` for more details."""
+    default_cfg = default_cfgs["densenet161"]
+    model = DenseNet(growth_rate=48, block_config=(6, 12, 36, 24), num_init_features=96, in_channels=in_channels,
+                     num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.densenet.densenet169(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 169 layers DenseNet model. +Refer to the base class models.DenseNet for more details.

+ +
+ Source code in mindcv/models/densenet.py +
253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
@register_model
+def densenet169(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DenseNet:
+    """Get 169 layers DenseNet model.
+     Refer to the base class `models.DenseNet` for more details."""
+    default_cfg = default_cfgs["densenet169"]
+    model = DenseNet(growth_rate=32, block_config=(6, 12, 32, 32), num_init_features=64, in_channels=in_channels,
+                     num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.densenet.densenet201(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 201 layers DenseNet model. +Refer to the base class models.DenseNet for more details.

+ +
+ Source code in mindcv/models/densenet.py +
267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
@register_model
+def densenet201(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DenseNet:
+    """Get 201 layers DenseNet model.
+     Refer to the base class `models.DenseNet` for more details."""
+    default_cfg = default_cfgs["densenet201"]
+    model = DenseNet(growth_rate=32, block_config=(6, 12, 48, 32), num_init_features=64, in_channels=in_channels,
+                     num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

dpn

+ + +
+ + + +

+ mindcv.models.dpn.DPN + + +

+ + +
+

+ Bases: nn.Cell

+ + +

DPN model class, based on +"Dual Path Networks" <https://arxiv.org/pdf/1707.01629.pdf>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_init_channel +
+

int type, the output channel of first blocks. Default: 64.

+
+

+ + TYPE: + int + + + DEFAULT: + 64 + +

+
k_r +
+

int type, the first channel of each stage. Default: 96.

+
+

+ + TYPE: + int + + + DEFAULT: + 96 + +

+
g +
+

int type,number of group in the conv2d. Default: 32.

+
+

+ + TYPE: + int + + + DEFAULT: + 32 + +

+
k_sec +
+

multiplicative factor for number of bottleneck layers. Default: 4.

+
+

+ + TYPE: + Tuple[int] + + + DEFAULT: + (3, 4, 20, 3) + +

+
inc_sec +
+

the first output channel in each stage. Default: (16, 32, 24, 128).

+
+

+ + TYPE: + Tuple[int] + + + DEFAULT: + (16, 32, 24, 128) + +

+
in_channels +
+

int type, number of input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

int type, number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
+ +
+ Source code in mindcv/models/dpn.py +
140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
class DPN(nn.Cell):
+    r"""DPN model class, based on
+    `"Dual Path Networks" <https://arxiv.org/pdf/1707.01629.pdf>`_
+
+    Args:
+        num_init_channel: int type, the output channel of first blocks. Default: 64.
+        k_r: int type, the first channel of each stage. Default: 96.
+        g: int type,number of group in the conv2d. Default: 32.
+        k_sec Tuple[int]: multiplicative factor for number of bottleneck layers. Default: 4.
+        inc_sec Tuple[int]: the first output channel in each stage. Default: (16, 32, 24, 128).
+        in_channels: int type, number of input channels. Default: 3.
+        num_classes: int type, number of classification classes. Default: 1000.
+    """
+
+    def __init__(
+        self,
+        num_init_channel: int = 64,
+        k_r: int = 96,
+        g: int = 32,
+        k_sec: Tuple[int, int, int, int] = (3, 4, 20, 3),
+        inc_sec: Tuple[int, int, int, int] = (16, 32, 24, 128),
+        in_channels: int = 3,
+        num_classes: int = 1000,
+    ):
+        super().__init__()
+        blocks = OrderedDict()
+
+        # conv1
+        blocks["conv1"] = nn.SequentialCell(OrderedDict([
+            ("conv", nn.Conv2d(in_channels, num_init_channel, kernel_size=7, stride=2, pad_mode="pad", padding=3)),
+            ("norm", nn.BatchNorm2d(num_init_channel, eps=1e-3, momentum=0.9)),
+            ("relu", nn.ReLU()),
+            ("maxpool", nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")),
+        ]))
+
+        # conv2
+        bw = 256
+        inc = inc_sec[0]
+        r = int((k_r * bw) / 256)
+        blocks["conv2_1"] = DualPathBlock(num_init_channel, r, r, bw, inc, g, "proj", False)
+        in_channel = bw + 3 * inc
+        for i in range(2, k_sec[0] + 1):
+            blocks[f"conv2_{i}"] = DualPathBlock(in_channel, r, r, bw, inc, g, "normal")
+            in_channel += inc
+
+        # conv3
+        bw = 512
+        inc = inc_sec[1]
+        r = int((k_r * bw) / 256)
+        blocks["conv3_1"] = DualPathBlock(in_channel, r, r, bw, inc, g, "down")
+        in_channel = bw + 3 * inc
+        for i in range(2, k_sec[1] + 1):
+            blocks[f"conv3_{i}"] = DualPathBlock(in_channel, r, r, bw, inc, g, "normal")
+            in_channel += inc
+
+        # conv4
+        bw = 1024
+        inc = inc_sec[2]
+        r = int((k_r * bw) / 256)
+        blocks["conv4_1"] = DualPathBlock(in_channel, r, r, bw, inc, g, "down")
+        in_channel = bw + 3 * inc
+        for i in range(2, k_sec[2] + 1):
+            blocks[f"conv4_{i}"] = DualPathBlock(in_channel, r, r, bw, inc, g, "normal")
+            in_channel += inc
+
+        # conv5
+        bw = 2048
+        inc = inc_sec[3]
+        r = int((k_r * bw) / 256)
+        blocks["conv5_1"] = DualPathBlock(in_channel, r, r, bw, inc, g, "down")
+        in_channel = bw + 3 * inc
+        for i in range(2, k_sec[3] + 1):
+            blocks[f"conv5_{i}"] = DualPathBlock(in_channel, r, r, bw, inc, g, "normal")
+            in_channel += inc
+
+        self.features = nn.SequentialCell(blocks)
+        self.conv5_x = nn.SequentialCell(OrderedDict([
+            ("norm", nn.BatchNorm2d(in_channel, eps=1e-3, momentum=0.9)),
+            ("relu", nn.ReLU()),
+        ]))
+        self.avgpool = GlobalAvgPooling()
+        self.classifier = nn.Dense(in_channel, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(math.sqrt(5), mode="fan_out", nonlinearity="relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="leaky_relu"),
+                                         cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="leaky_relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_feature(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        x = ops.concat(x, axis=1)
+        x = self.conv5_x(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.avgpool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_feature(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.dpn.dpn107(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 107 layers DPN model. +Refer to the base class models.DPN for more details.

+ +
+ Source code in mindcv/models/dpn.py +
304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
@register_model
+def dpn107(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DPN:
+    """Get 107 layers DPN model.
+     Refer to the base class `models.DPN` for more details."""
+    default_cfg = default_cfgs["dpn107"]
+    model = DPN(num_init_channel=128, k_r=200, g=50, k_sec=(4, 8, 20, 3), inc_sec=(20, 64, 64, 128),
+                num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.dpn.dpn131(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 131 layers DPN model. +Refer to the base class models.DPN for more details.

+ +
+ Source code in mindcv/models/dpn.py +
290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
@register_model
+def dpn131(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DPN:
+    """Get 131 layers DPN model.
+     Refer to the base class `models.DPN` for more details."""
+    default_cfg = default_cfgs["dpn131"]
+    model = DPN(num_init_channel=128, k_r=160, g=40, k_sec=(4, 8, 28, 3), inc_sec=(16, 32, 32, 128),
+                num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.dpn.dpn92(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 92 layers DPN model. +Refer to the base class models.DPN for more details.

+ +
+ Source code in mindcv/models/dpn.py +
262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
@register_model
+def dpn92(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DPN:
+    """Get 92 layers DPN model.
+     Refer to the base class `models.DPN` for more details."""
+    default_cfg = default_cfgs["dpn92"]
+    model = DPN(num_init_channel=64, k_r=96, g=32, k_sec=(3, 4, 20, 3), inc_sec=(16, 32, 24, 128),
+                num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.dpn.dpn98(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 98 layers DPN model. +Refer to the base class models.DPN for more details.

+ +
+ Source code in mindcv/models/dpn.py +
276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
@register_model
+def dpn98(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DPN:
+    """Get 98 layers DPN model.
+     Refer to the base class `models.DPN` for more details."""
+    default_cfg = default_cfgs["dpn98"]
+    model = DPN(num_init_channel=96, k_r=160, g=40, k_sec=(3, 6, 20, 3), inc_sec=(16, 32, 32, 128),
+                num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

edgenext

+ + +
+ + + +

+ mindcv.models.edgenext.EdgeNeXt + + +

+ + +
+

+ Bases: nn.Cell

+ + +

EdgeNeXt model class, based on +"Efficiently Amalgamated CNN-Transformer Architecture for Mobile Vision" <https://arxiv.org/abs/2206.10589>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
in_channels +
+

number of input channels. Default: 3

+
+

+

+
num_classes +
+

number of classification classes. Default: 1000

+
+

+ + DEFAULT: + 1000 + +

+
depths +
+

the depths of each layer. Default: [0, 0, 0, 3]

+
+

+ + DEFAULT: + [3, 3, 9, 3] + +

+
dims +
+

the middle dim of each layer. Default: [24, 48, 88, 168]

+
+

+ + DEFAULT: + [24, 48, 88, 168] + +

+
global_block +
+

number of global block. Default: [0, 0, 0, 3]

+
+

+ + DEFAULT: + [0, 0, 0, 3] + +

+
global_block_type +
+

type of global block. Default: ['None', 'None', 'None', 'SDTA']

+
+

+ + DEFAULT: + ['None', 'None', 'None', 'SDTA'] + +

+
drop_path_rate +
+

Stochastic Depth. Default: 0.

+
+

+ + DEFAULT: + 0.0 + +

+
layer_scale_init_value +
+

value of layer scale initialization. Default: 1e-6

+
+

+ + DEFAULT: + 1e-06 + +

+
head_init_scale +
+

scale of head initialization. Default: 1.

+
+

+ + DEFAULT: + 1.0 + +

+
expan_ratio +
+

ratio of expansion. Default: 4

+
+

+ + DEFAULT: + 4 + +

+
kernel_sizes +
+

kernel sizes of different stages. Default: [7, 7, 7, 7]

+
+

+ + DEFAULT: + [7, 7, 7, 7] + +

+
heads +
+

number of attention heads. Default: [8, 8, 8, 8]

+
+

+ + DEFAULT: + [8, 8, 8, 8] + +

+
use_pos_embd_xca +
+

use position embedding in xca or not. Default: [False, False, False, False]

+
+

+ + DEFAULT: + [False, False, False, False] + +

+
use_pos_embd_global +
+

use position embedding globally or not. Default: False

+
+

+ + DEFAULT: + False + +

+
d2_scales +
+

scales of splitting channels

+
+

+ + DEFAULT: + [2, 3, 4, 5] + +

+
+ +
+ Source code in mindcv/models/edgenext.py +
296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
class EdgeNeXt(nn.Cell):
+    r"""EdgeNeXt model class, based on
+    `"Efficiently Amalgamated CNN-Transformer Architecture for Mobile Vision" <https://arxiv.org/abs/2206.10589>`_
+
+    Args:
+        in_channels: number of input channels. Default: 3
+        num_classes: number of classification classes. Default: 1000
+        depths: the depths of each layer. Default: [0, 0, 0, 3]
+        dims: the middle dim of each layer. Default: [24, 48, 88, 168]
+        global_block: number of global block. Default: [0, 0, 0, 3]
+        global_block_type: type of global block. Default: ['None', 'None', 'None', 'SDTA']
+        drop_path_rate: Stochastic Depth. Default: 0.
+        layer_scale_init_value: value of layer scale initialization. Default: 1e-6
+        head_init_scale: scale of head initialization. Default: 1.
+        expan_ratio: ratio of expansion. Default: 4
+        kernel_sizes: kernel sizes of different stages. Default: [7, 7, 7, 7]
+        heads: number of attention heads. Default: [8, 8, 8, 8]
+        use_pos_embd_xca: use position embedding in xca or not. Default: [False, False, False, False]
+        use_pos_embd_global: use position embedding globally or not. Default: False
+        d2_scales: scales of splitting channels
+    """
+    def __init__(self, in_chans=3, num_classes=1000,
+                 depths=[3, 3, 9, 3], dims=[24, 48, 88, 168],
+                 global_block=[0, 0, 0, 3], global_block_type=["None", "None", "None", "SDTA"],
+                 drop_path_rate=0., layer_scale_init_value=1e-6, head_init_scale=1., expan_ratio=4,
+                 kernel_sizes=[7, 7, 7, 7], heads=[8, 8, 8, 8], use_pos_embd_xca=[False, False, False, False],
+                 use_pos_embd_global=False, d2_scales=[2, 3, 4, 5], **kwargs):
+        super().__init__()
+        for g in global_block_type:
+            assert g in ["None", "SDTA"]
+        if use_pos_embd_global:
+            self.pos_embd = PositionalEncodingFourier(dim=dims[0])
+        else:
+            self.pos_embd = None
+        self.downsample_layers = nn.CellList()  # stem and 3 intermediate downsampling conv layers
+        stem = nn.SequentialCell(
+            nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4, has_bias=True),
+            LayerNorm((dims[0],), epsilon=1e-6, norm_axis=1),
+        )
+        self.downsample_layers.append(stem)
+        for i in range(3):
+            downsample_layer = nn.SequentialCell(
+                LayerNorm((dims[i],), epsilon=1e-6, norm_axis=1),
+                nn.Conv2d(dims[i], dims[i + 1], kernel_size=2, stride=2, has_bias=True),
+            )
+            self.downsample_layers.append(downsample_layer)
+
+        self.stages = nn.CellList()  # 4 feature resolution stages, each consisting of multiple residual blocks
+        dp_rates = list(np.linspace(0, drop_path_rate, sum(depths)))
+        cur = 0
+        for i in range(4):
+            stage_blocks = []
+            for j in range(depths[i]):
+                if j > depths[i] - global_block[i] - 1:
+                    if global_block_type[i] == "SDTA":
+                        stage_blocks.append(SDTAEncoder(dim=dims[i], drop_path=dp_rates[cur + j],
+                                                        expan_ratio=expan_ratio, scales=d2_scales[i],
+                                                        use_pos_emb=use_pos_embd_xca[i], num_heads=heads[i]))
+                    else:
+                        raise NotImplementedError
+                else:
+                    stage_blocks.append(ConvEncoder(dim=dims[i], drop_path=dp_rates[cur + j],
+                                                    layer_scale_init_value=layer_scale_init_value,
+                                                    expan_ratio=expan_ratio, kernel_size=kernel_sizes[i]))
+
+            self.stages.append(nn.SequentialCell(*stage_blocks))
+            cur += depths[i]
+        self.norm = nn.LayerNorm((dims[-1],), epsilon=1e-6)  # Final norm layer
+        self.head = nn.Dense(dims[-1], num_classes)
+
+        # self.head_dropout = Dropout(kwargs["classifier_dropout"])
+        self.head_dropout = Dropout(p=0.0)
+        self.head_init_scale = head_init_scale
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, (nn.Dense, nn.Conv2d)):
+                cell.weight.set_data(
+                    init.initializer(init.TruncatedNormal(sigma=0.02), cell.weight.shape, cell.weight.dtype)
+                )
+                if isinstance(cell, nn.Dense) and cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Zero(), cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, (nn.LayerNorm)):
+                cell.gamma.set_data(init.initializer(init.One(), cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer(init.Zero(), cell.beta.shape, cell.beta.dtype))
+        self.head.weight.set_data(self.head.weight * self.head_init_scale)
+        self.head.bias.set_data(self.head.bias * self.head_init_scale)
+
+    def forward_features(self, x):
+        x = self.downsample_layers[0](x)
+        x = self.stages[0](x)
+        if self.pos_embd is not None:
+            B, C, H, W = x.shape
+            x = x + self.pos_embd(B, H, W)
+        for i in range(1, 4):
+            x = self.downsample_layers[i](x)
+            x = self.stages[i](x)
+        return self.norm(x.mean([-2, -1]))  # Global average pooling, (N, C, H, W) -> (N, C)
+
+    def construct(self, x):
+        x = self.forward_features(x)
+        x = self.head(self.head_dropout(x))
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.edgenext.edgenext_base(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get edgenext_base model. +Refer to the base class models.EdgeNeXt for more details.

+ +
+ Source code in mindcv/models/edgenext.py +
472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
@register_model
+def edgenext_base(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> EdgeNeXt:
+    """Get edgenext_base model.
+    Refer to the base class `models.EdgeNeXt` for more details."""
+    default_cfg = default_cfgs["edgenext_base"]
+    model = EdgeNeXt(
+        depths=[3, 3, 9, 3],
+        dims=[80, 160, 288, 584],
+        expan_ratio=4,
+        num_classes=num_classes,
+        global_block=[0, 1, 1, 1],
+        global_block_type=["None", "SDTA", "SDTA", "SDTA"],
+        use_pos_embd_xca=[False, True, False, False],
+        kernel_sizes=[3, 5, 7, 9],
+        d2_scales=[2, 2, 3, 4],
+        **kwargs
+    )
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.edgenext.edgenext_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get edgenext_small model. +Refer to the base class models.EdgeNeXt for more details.

+ +
+ Source code in mindcv/models/edgenext.py +
450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
@register_model
+def edgenext_small(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> EdgeNeXt:
+    """Get edgenext_small model.
+    Refer to the base class `models.EdgeNeXt` for more details."""
+    default_cfg = default_cfgs["edgenext_small"]
+    model = EdgeNeXt(
+        depths=[3, 3, 9, 3],
+        dims=[48, 96, 160, 304],
+        expan_ratio=4,
+        num_classes=num_classes,
+        global_block=[0, 1, 1, 1],
+        global_block_type=["None", "SDTA", "SDTA", "SDTA"],
+        use_pos_embd_xca=[False, True, False, False],
+        kernel_sizes=[3, 5, 7, 9],
+        d2_scales=[2, 2, 3, 4],
+        **kwargs
+    )
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.edgenext.edgenext_x_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get edgenext_x_small model. +Refer to the base class models.EdgeNeXt for more details.

+ +
+ Source code in mindcv/models/edgenext.py +
427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
@register_model
+def edgenext_x_small(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> EdgeNeXt:
+    """Get edgenext_x_small model.
+    Refer to the base class `models.EdgeNeXt` for more details."""
+    default_cfg = default_cfgs["edgenext_x_small"]
+    model = EdgeNeXt(
+        depths=[3, 3, 9, 3],
+        dims=[32, 64, 100, 192],
+        expan_ratio=4,
+        num_classes=num_classes,
+        global_block=[0, 1, 1, 1],
+        global_block_type=["None", "SDTA", "SDTA", "SDTA"],
+        use_pos_embd_xca=[False, True, False, False],
+        kernel_sizes=[3, 5, 7, 9],
+        heads=[4, 4, 4, 4],
+        d2_scales=[2, 2, 3, 4],
+        **kwargs
+    )
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.edgenext.edgenext_xx_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get edgenext_xx_small model. +Refer to the base class models.EdgeNeXt for more details.

+ +
+ Source code in mindcv/models/edgenext.py +
403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
@register_model
+def edgenext_xx_small(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> EdgeNeXt:
+    """Get edgenext_xx_small model.
+        Refer to the base class `models.EdgeNeXt` for more details."""
+    default_cfg = default_cfgs["edgenext_xx_small"]
+    model = EdgeNeXt(
+        depths=[2, 2, 6, 2],
+        dims=[24, 48, 88, 168],
+        expan_ratio=4,
+        num_classes=num_classes,
+        global_block=[0, 1, 1, 1],
+        global_block_type=['None', 'SDTA', 'SDTA', 'SDTA'],
+        use_pos_embd_xca=[False, True, False, False],
+        kernel_sizes=[3, 5, 7, 9],
+        heads=[4, 4, 4, 4],
+        d2_scales=[2, 2, 3, 4],
+        **kwargs
+    )
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

efficientnet

+ + +
+ + + +

+ mindcv.models.efficientnet.EfficientNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

EfficientNet architecture. +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
arch +
+

The name of the model.

+
+

+ + TYPE: + str + +

+
dropout_rate +
+

The dropout rate of efficientnet.

+
+

+ + TYPE: + float + +

+
width_mult +
+

The ratio of the channel. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
depth_mult +
+

The ratio of num_layers. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
in_channels +
+

The input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

The number of class. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
inverted_residual_setting +
+

The settings of block. +Default: None.

+
+

+ + TYPE: + Sequence[Union[MBConvConfig, FusedMBConvConfig]] + + + DEFAULT: + None + +

+
drop_path_prob +
+

The drop path rate of MBConv. Default: 0.2.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.2 + +

+
norm_layer +
+

The normalization layer. Default: None.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + None + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, 1000).

+
+
+ Source code in mindcv/models/efficientnet.py +
275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
class EfficientNet(nn.Cell):
+    """
+    EfficientNet architecture.
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        arch (str): The name of the model.
+        dropout_rate (float): The dropout rate of efficientnet.
+        width_mult (float): The ratio of the channel. Default: 1.0.
+        depth_mult (float): The ratio of num_layers. Default: 1.0.
+        in_channels (int): The input channels. Default: 3.
+        num_classes (int): The number of class. Default: 1000.
+        inverted_residual_setting (Sequence[Union[MBConvConfig, FusedMBConvConfig]], optional): The settings of block.
+            Default: None.
+        drop_path_prob (float): The drop path rate of MBConv. Default: 0.2.
+        norm_layer (nn.Cell, optional): The normalization layer. Default: None.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, 1000)`.
+    """
+
+    def __init__(
+        self,
+        arch: str,
+        dropout_rate: float,
+        width_mult: float = 1.0,
+        depth_mult: float = 1.0,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+        inverted_residual_setting: Optional[Sequence[Union[MBConvConfig, FusedMBConvConfig]]] = None,
+        drop_path_prob: float = 0.2,
+        norm_layer: Optional[nn.Cell] = None,
+    ) -> None:
+        super().__init__()
+        self.last_channel = None
+
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+            if width_mult >= 1.6:
+                norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.99)
+
+        layers: List[nn.Cell] = []
+
+        if not inverted_residual_setting:
+            if arch.startswith("efficientnet_b"):
+                bneck_conf = partial(MBConvConfig, width_cnf=width_mult, depth_cnf=depth_mult)
+                inverted_residual_setting = [
+                    bneck_conf(1, 3, 1, 32, 16, 1),
+                    bneck_conf(6, 3, 2, 16, 24, 2),
+                    bneck_conf(6, 5, 2, 24, 40, 2),
+                    bneck_conf(6, 3, 2, 40, 80, 3),
+                    bneck_conf(6, 5, 1, 80, 112, 3),
+                    bneck_conf(6, 5, 2, 112, 192, 4),
+                    bneck_conf(6, 3, 1, 192, 320, 1),
+                ]
+            elif arch.startswith("efficientnet_v2_s"):
+                inverted_residual_setting = [
+                    FusedMBConvConfig(1, 3, 1, 24, 24, 2),
+                    FusedMBConvConfig(4, 3, 2, 24, 48, 4),
+                    FusedMBConvConfig(4, 3, 2, 48, 64, 4),
+                    MBConvConfig(4, 3, 2, 64, 128, 6),
+                    MBConvConfig(6, 3, 1, 128, 160, 9),
+                    MBConvConfig(6, 3, 2, 160, 256, 15),
+                ]
+                self.last_channel = 1280
+            elif arch.startswith("efficientnet_v2_m"):
+                inverted_residual_setting = [
+                    FusedMBConvConfig(1, 3, 1, 24, 24, 3),
+                    FusedMBConvConfig(4, 3, 2, 24, 48, 5),
+                    FusedMBConvConfig(4, 3, 2, 48, 80, 5),
+                    MBConvConfig(4, 3, 2, 80, 160, 7),
+                    MBConvConfig(6, 3, 1, 160, 176, 14),
+                    MBConvConfig(6, 3, 2, 176, 304, 18),
+                    MBConvConfig(6, 3, 1, 304, 512, 5),
+                ]
+                self.last_channel = 1280
+            elif arch.startswith("efficientnet_v2_l"):
+                inverted_residual_setting = [
+                    FusedMBConvConfig(1, 3, 1, 32, 32, 4),
+                    FusedMBConvConfig(4, 3, 2, 32, 64, 7),
+                    FusedMBConvConfig(4, 3, 2, 64, 96, 7),
+                    MBConvConfig(4, 3, 2, 96, 192, 10),
+                    MBConvConfig(6, 3, 1, 192, 224, 19),
+                    MBConvConfig(6, 3, 2, 224, 384, 25),
+                    MBConvConfig(6, 3, 1, 384, 640, 7),
+                ]
+                self.last_channel = 1280
+            elif arch.startswith("efficientnet_v2_xl"):
+                inverted_residual_setting = [
+                    FusedMBConvConfig(1, 3, 1, 32, 32, 4),
+                    FusedMBConvConfig(4, 3, 2, 32, 64, 8),
+                    FusedMBConvConfig(4, 3, 2, 64, 96, 8),
+                    MBConvConfig(4, 3, 2, 96, 192, 16),
+                    MBConvConfig(6, 3, 1, 192, 256, 24),
+                    MBConvConfig(6, 3, 2, 256, 512, 32),
+                    MBConvConfig(6, 3, 1, 512, 640, 8),
+                ]
+                self.last_channel = 1280
+
+        # building first layer
+        firstconv_output_channels = inverted_residual_setting[0].input_channels
+        layers.extend([
+            nn.Conv2d(in_channels, firstconv_output_channels, kernel_size=3, stride=2),
+            norm_layer(firstconv_output_channels),
+            Swish(),
+        ])
+
+        total_reduction = 2
+        self.feature_info = [dict(chs=firstconv_output_channels, reduction=total_reduction,
+                                  name=f'features.{len(layers) - 1}')]
+
+        # building MBConv blocks
+        total_stage_blocks = sum(cnf.num_layers for cnf in inverted_residual_setting)
+        stage_block_id = 0
+
+        # cnf is the settings of block
+        for cnf in inverted_residual_setting:
+            stage: List[nn.Cell] = []
+
+            # cnf.num_layers is the num of the same block
+            for _ in range(cnf.num_layers):
+                # copy to avoid modifications. shallow copy is enough
+                block_cnf = copy.copy(cnf)
+
+                block = MBConv
+
+                if "FusedMBConvConfig" in str(type(block_cnf)):
+                    block = FusedMBConv
+
+                # overwrite info if not the first conv in the stage
+                if stage:
+                    block_cnf.input_channels = block_cnf.out_channels
+                    block_cnf.stride = 1
+
+                # adjust dropout rate of blocks based on the depth of the stage block
+                sd_prob = drop_path_prob * float(stage_block_id) / total_stage_blocks
+
+                total_reduction *= block_cnf.stride
+
+                stage.append(block(block_cnf, sd_prob, norm_layer))
+                stage_block_id += 1
+
+            layers.append(nn.SequentialCell(stage))
+
+            self.feature_info.append(dict(chs=cnf.out_channels, reduction=total_reduction,
+                                          name=f'features.{len(layers) - 1}'))
+
+        # building last several layers
+        lastconv_input_channels = inverted_residual_setting[-1].out_channels
+        lastconv_output_channels = self.last_channel if self.last_channel is not None else 4 * lastconv_input_channels
+        layers.extend([
+            nn.Conv2d(lastconv_input_channels, lastconv_output_channels, kernel_size=1),
+            norm_layer(lastconv_output_channels),
+            Swish(),
+        ])
+
+        self.feature_info.append(dict(chs=lastconv_output_channels, reduction=total_reduction,
+                                      name=f'features.{len(layers) - 1}'))
+        self.flatten_sequential = True
+
+        self.features = nn.SequentialCell(layers)
+        self.avgpool = GlobalAvgPooling()
+        self.dropout = Dropout(p=dropout_rate)
+        self.mlp_head = nn.Dense(lastconv_output_channels, num_classes)
+        self._initialize_weights()
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+
+        x = self.avgpool(x)
+
+        if self.training:
+            x = self.dropout(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        return self.mlp_head(x)
+
+    def construct(self, x: Tensor) -> Tensor:
+        """construct"""
+        x = self.forward_features(x)
+        return self.forward_head(x)
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Dense):
+                init_range = 1.0 / np.sqrt(cell.weight.shape[0])
+                cell.weight.set_data(weight_init.initializer(Uniform(init_range), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))
+            if isinstance(cell, nn.Conv2d):
+                out_channel, _, kernel_size_h, kernel_size_w = cell.weight.shape
+                stddev = np.sqrt(2 / int(out_channel * kernel_size_h * kernel_size_w))
+                cell.weight.set_data(
+                    weight_init.initializer(Normal(sigma=stddev), cell.weight.shape, cell.weight.dtype)
+                )
+                if cell.bias is not None:
+                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.efficientnet.EfficientNet.construct(x) + +

+ + +
+ +

construct

+ +
+ Source code in mindcv/models/efficientnet.py +
456
+457
+458
+459
def construct(self, x: Tensor) -> Tensor:
+    """construct"""
+    x = self.forward_features(x)
+    return self.forward_head(x)
+
+
+
+ +
+ + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b0(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B0 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
+513
+514
@register_model
+def efficientnet_b0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B0 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b0", 1.0, 1.0, 0.2, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b1(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B1 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
517
+518
+519
+520
+521
+522
+523
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
@register_model
+def efficientnet_b1(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B1 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b1", 1.0, 1.1, 0.2, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B2 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
@register_model
+def efficientnet_b2(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B2 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b2", 1.1, 1.2, 0.3, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b3(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B3 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
@register_model
+def efficientnet_b3(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B3 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b3", 1.2, 1.4, 0.3, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b4(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B4 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
@register_model
+def efficientnet_b4(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B4 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b4", 1.4, 1.8, 0.4, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b5(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B5 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
+613
+614
@register_model
+def efficientnet_b5(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B5 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b5", 1.6, 2.2, 0.4, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b6(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B6 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
617
+618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
@register_model
+def efficientnet_b6(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B6 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b6", 1.8, 2.6, 0.5, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b7(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B7 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
@register_model
+def efficientnet_b7(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B7 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b7", 2.0, 3.1, 0.5, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_v2_l(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B4 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
697
+698
+699
+700
+701
+702
+703
+704
+705
+706
+707
+708
+709
+710
+711
+712
+713
+714
@register_model
+def efficientnet_v2_l(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B4 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_v2_l", 1.0, 1.0, 0.2, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_v2_m(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B4 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
677
+678
+679
+680
+681
+682
+683
+684
+685
+686
+687
+688
+689
+690
+691
+692
+693
+694
@register_model
+def efficientnet_v2_m(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B4 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_v2_m", 1.0, 1.0, 0.2, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_v2_s(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B4 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
657
+658
+659
+660
+661
+662
+663
+664
+665
+666
+667
+668
+669
+670
+671
+672
+673
+674
@register_model
+def efficientnet_v2_s(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B4 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_v2_s", 1.0, 1.0, 0.2, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_v2_xl(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B4 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
717
+718
+719
+720
+721
+722
+723
+724
+725
+726
+727
+728
+729
+730
+731
+732
+733
+734
@register_model
+def efficientnet_v2_xl(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B4 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_v2_xl", 1.0, 1.0, 0.2, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +

features

+

ghostnet

+ + +
+ + + +

+ mindcv.models.ghostnet.GhostNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

GhostNet model class, based on +"GhostNet: More Features from Cheap Operations " <https://arxiv.org/abs/1911.11907>_.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
width +
+

base width of hidden channel in blocks. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
in_channels +
+

number of input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
drop_rate +
+

the probability of the features before classification. Default: 0.2.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.2 + +

+
+ +
+ Source code in mindcv/models/ghostnet.py +
177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
class GhostNet(nn.Cell):
+    r"""GhostNet model class, based on
+    `"GhostNet: More Features from Cheap Operations " <https://arxiv.org/abs/1911.11907>`_.
+    Args:
+        num_classes: number of classification classes. Default: 1000.
+        width: base width of hidden channel in blocks. Default: 1.0.
+        in_channels: number of input channels. Default: 3.
+        drop_rate: the probability of the features before classification. Default: 0.2.
+    """
+
+    def __init__(
+        self,
+        num_classes: int = 1000,
+        width: float = 1.0,
+        in_channels: int = 3,
+        drop_rate: float = 0.2,
+    ) -> None:
+        super().__init__()
+        # setting of inverted residual blocks
+        self.num_classes = num_classes
+        self.drop_rate = drop_rate
+        self.cfgs = [
+            # k, t, c, SE, s
+            # stage1
+            [[3, 16, 16, 0, 1]],
+            # stage2
+            [[3, 48, 24, 0, 2]],
+            [[3, 72, 24, 0, 1]],
+            # stage3
+            [[5, 72, 40, 0.25, 2]],
+            [[5, 120, 40, 0.25, 1]],
+            # stage4
+            [[3, 240, 80, 0, 2]],
+            [[3, 200, 80, 0, 1],
+             [3, 184, 80, 0, 1],
+             [3, 184, 80, 0, 1],
+             [3, 480, 112, 0.25, 1],
+             [3, 672, 112, 0.25, 1]
+             ],
+            # stage5
+            [[5, 672, 160, 0.25, 2]],
+            [[5, 960, 160, 0, 1],
+             [5, 960, 160, 0.25, 1],
+             [5, 960, 160, 0, 1],
+             [5, 960, 160, 0.25, 1]
+             ]
+        ]
+
+        # building first layer
+        stem_chs = make_divisible(16 * width, 4)
+        self.conv_stem = nn.Conv2d(in_channels, stem_chs, 3, 2, pad_mode="pad", padding=1, has_bias=False)
+        self.bn1 = nn.BatchNorm2d(stem_chs)
+        self.act1 = nn.ReLU()
+        prev_chs = stem_chs
+
+        # building inverted residual blocks
+        stages = []
+        for cfg in self.cfgs:
+            layers = []
+            for k, exp_size, c, se_ratio, s in cfg:
+                out_chs = make_divisible(c * width, 4)
+                mid_chs = make_divisible(exp_size * width, 4)
+                layers.append(GhostBottleneck(prev_chs, mid_chs, out_chs, k, s, se_ratio=se_ratio))
+                prev_chs = out_chs
+            stages.append(nn.SequentialCell(layers))
+
+        out_chs = make_divisible(exp_size * width, 4)
+        stages.append(ConvBnAct(prev_chs, out_chs, 1))
+        prev_chs = out_chs
+
+        self.blocks = nn.SequentialCell(stages)
+
+        # building last several layers
+        self.num_features = out_chs = 1280
+        self.global_pool = GlobalAvgPooling(keep_dims=True)
+        self.conv_head = nn.Conv2d(prev_chs, out_chs, 1, 1, pad_mode="pad", padding=0, has_bias=True)
+        self.act2 = nn.ReLU()
+        self.flatten = nn.Flatten()
+        if self.drop_rate > 0.0:
+            self.dropout = Dropout(p=drop_rate)
+        self.classifier = nn.Dense(out_chs, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(init.initializer(init.HeUniform(), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(init.initializer(init.HeUniform(), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.conv_stem(x)
+        x = self.bn1(x)
+        x = self.act1(x)
+        x = self.blocks(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.global_pool(x)
+        x = self.conv_head(x)
+        x = self.act2(x)
+        x = self.flatten(x)
+        if self.drop_rate > 0.0:
+            x = self.dropout(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.ghostnet.ghostnet_050(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

GhostNet-0.5x

+ +
+ Source code in mindcv/models/ghostnet.py +
298
+299
+300
+301
+302
+303
+304
+305
+306
+307
@register_model
+def ghostnet_050(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """ GhostNet-0.5x """
+    default_cfg = default_cfgs["ghostnet_050"]
+    model = GhostNet(width=0.5, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.ghostnet.ghostnet_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

GhostNet-1.0x

+ +
+ Source code in mindcv/models/ghostnet.py +
310
+311
+312
+313
+314
+315
+316
+317
+318
+319
@register_model
+def ghostnet_100(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """ GhostNet-1.0x """
+    default_cfg = default_cfgs["ghostnet_100"]
+    model = GhostNet(width=1.0, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.ghostnet.ghostnet_130(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

GhostNet-1.3x

+ +
+ Source code in mindcv/models/ghostnet.py +
322
+323
+324
+325
+326
+327
+328
+329
+330
+331
@register_model
+def ghostnet_130(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """ GhostNet-1.3x """
+    default_cfg = default_cfgs["ghostnet_130"]
+    model = GhostNet(width=1.3, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

hrnet

+ + +
+ + + +

+ mindcv.models.hrnet.HRNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

HRNet Backbone, based on +"Deep High-Resolution Representation Learning for Visual Recognition" +<https://arxiv.org/abs/1908.07919>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
stage_cfg +
+

Configuration of the extra blocks. It accepts a dictionay +storing the detail config of each block. which include num_modules, +num_branches, block, num_blocks, num_channels. For detail example, +please check the implementation of hrnet_w32 and hrnet_w48.

+
+

+ + TYPE: + Dict[str, Dict[str, int]] + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

Number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Source code in mindcv/models/hrnet.py +
357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
+513
+514
+515
+516
+517
+518
+519
+520
+521
+522
+523
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
+613
+614
+615
+616
+617
+618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
+663
+664
+665
+666
+667
+668
+669
+670
+671
+672
+673
+674
+675
+676
+677
+678
+679
+680
+681
class HRNet(nn.Cell):
+    r"""HRNet Backbone, based on
+    `"Deep High-Resolution Representation Learning for Visual Recognition"
+    <https://arxiv.org/abs/1908.07919>`_.
+
+    Args:
+        stage_cfg: Configuration of the extra blocks. It accepts a dictionay
+            storing the detail config of each block. which include `num_modules`,
+            `num_branches`, `block`, `num_blocks`, `num_channels`. For detail example,
+            please check the implementation of `hrnet_w32` and `hrnet_w48`.
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: Number the channels of the input. Default: 3.
+    """
+
+    blocks_dict = {"BASIC": BasicBlock, "BOTTLENECK": Bottleneck}
+
+    def __init__(
+        self,
+        stage_cfg: Dict[str, Dict[str, int]],
+        num_classes: int = 1000,
+        in_channels: int = 3,
+    ) -> None:
+        super().__init__()
+
+        self.stage_cfg = stage_cfg
+        # stem net
+        self.conv1 = nn.Conv2d(
+            in_channels, 64, kernel_size=3, stride=2, padding=1, pad_mode="pad"
+        )
+        self.bn1 = nn.BatchNorm2d(64)
+        self.conv2 = nn.Conv2d(
+            64, 64, kernel_size=3, stride=2, padding=1, pad_mode="pad"
+        )
+        self.bn2 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU()
+
+        # stage 1
+        self.stage1_cfg = self.stage_cfg["stage1"]
+        num_channels = self.stage1_cfg["num_channels"][0]
+        num_blocks = self.stage1_cfg["num_blocks"][0]
+        block = self.blocks_dict[self.stage1_cfg["block"]]
+        self.layer1 = self._make_layer(block, 64, num_channels, num_blocks)
+
+        # stage 2
+        self.stage2_cfg = self.stage_cfg["stage2"]
+        num_channels = self.stage2_cfg["num_channels"]
+        block = self.blocks_dict[self.stage2_cfg["block"]]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))
+        ]
+
+        self.transition1, self.transition1_flags = self._make_transition_layer(
+            [256], num_channels
+        )
+        self.stage2, pre_stage_channels = self._make_stage(
+            self.stage2_cfg, num_channels
+        )
+
+        # stage 3
+        self.stage3_cfg = self.stage_cfg["stage3"]
+        num_channels = self.stage3_cfg["num_channels"]
+        block = self.blocks_dict[self.stage3_cfg["block"]]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))
+        ]
+
+        self.transition2, self.transition2_flags = self._make_transition_layer(
+            pre_stage_channels, num_channels
+        )
+        self.stage3, pre_stage_channels = self._make_stage(
+            self.stage3_cfg, num_channels
+        )
+
+        # stage 4
+        self.stage4_cfg = self.stage_cfg["stage4"]
+        num_channels = self.stage4_cfg["num_channels"]
+        block = self.blocks_dict[self.stage4_cfg["block"]]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))
+        ]
+        self.transition3, self.transition3_flags = self._make_transition_layer(
+            pre_stage_channels, num_channels
+        )
+        self.stage4, pre_stage_channels = self._make_stage(
+            self.stage4_cfg, num_channels
+        )
+
+        # head
+        self.pool = GlobalAvgPooling()
+        self.incre_modules, self.downsample_modules, self.final_layer = self._make_head(
+            pre_stage_channels
+        )
+        self.classifier = nn.Dense(2048, num_classes)
+
+    def _make_head(self, pre_stage_channels: List[int]):
+        head_block = Bottleneck
+        head_channels = [32, 64, 128, 256]
+
+        # increase the #channesl on each resolution
+        # from C, 2C, 4C, 8C to 128, 256, 512, 1024
+        incre_modules = list()
+        for i, channels in enumerate(pre_stage_channels):
+            incre_module = self._make_layer(
+                head_block, channels, head_channels[i], 1, stride=1
+            )
+            incre_modules.append(incre_module)
+        incre_modules = nn.CellList(incre_modules)
+
+        # downsample modules
+        downsamp_modules = []
+        for i in range(len(pre_stage_channels) - 1):
+            in_channels = head_channels[i] * head_block.expansion
+            out_channels = head_channels[i + 1] * head_block.expansion
+
+            downsamp_module = nn.SequentialCell(
+                nn.Conv2d(
+                    in_channels=in_channels,
+                    out_channels=out_channels,
+                    kernel_size=3,
+                    stride=2,
+                    pad_mode="pad",
+                    padding=1,
+                ),
+                nn.BatchNorm2d(out_channels),
+                nn.ReLU(),
+            )
+
+            downsamp_modules.append(downsamp_module)
+        downsamp_modules = nn.CellList(downsamp_modules)
+
+        final_layer = nn.SequentialCell(
+            nn.Conv2d(
+                in_channels=head_channels[3] * head_block.expansion,
+                out_channels=2048,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+            ),
+            nn.BatchNorm2d(2048),
+            nn.ReLU(),
+        )
+
+        return incre_modules, downsamp_modules, final_layer
+
+    def _make_transition_layer(
+        self, num_channels_pre_layer: List[int], num_channels_cur_layer: List[int]
+    ) -> Tuple[nn.CellList, List[bool]]:
+        num_branches_cur = len(num_channels_cur_layer)
+        num_branches_pre = len(num_channels_pre_layer)
+
+        transition_layers = []
+        transition_layers_flags = []
+        for i in range(num_branches_cur):
+            if i < num_branches_pre:
+                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+                    transition_layers.append(
+                        nn.SequentialCell(
+                            nn.Conv2d(
+                                num_channels_pre_layer[i],
+                                num_channels_cur_layer[i],
+                                kernel_size=3,
+                                padding=1,
+                                pad_mode="pad",
+                            ),
+                            nn.BatchNorm2d(num_channels_cur_layer[i]),
+                            nn.ReLU(),
+                        )
+                    )
+                    transition_layers_flags.append(True)
+                else:
+                    transition_layers.append(IdentityCell())
+                    transition_layers_flags.append(False)
+            else:
+                conv3x3s = []
+                for j in range(i + 1 - num_branches_pre):
+                    inchannels = num_channels_pre_layer[-1]
+                    outchannels = (
+                        num_channels_cur_layer[i]
+                        if j == i - num_branches_pre
+                        else inchannels
+                    )
+                    conv3x3s.append(
+                        nn.SequentialCell(
+                            [
+                                nn.Conv2d(
+                                    inchannels,
+                                    outchannels,
+                                    kernel_size=3,
+                                    stride=2,
+                                    padding=1,
+                                    pad_mode="pad",
+                                ),
+                                nn.BatchNorm2d(outchannels),
+                                nn.ReLU(),
+                            ]
+                        )
+                    )
+                transition_layers.append(nn.SequentialCell(conv3x3s))
+                transition_layers_flags.append(True)
+
+        return nn.CellList(transition_layers), transition_layers_flags
+
+    def _make_layer(
+        self,
+        block: Type[Union[BasicBlock, Bottleneck]],
+        in_channels: int,
+        out_channels: int,
+        blocks: int,
+        stride: int = 1,
+    ) -> nn.SequentialCell:
+        downsample = None
+        if stride != 1 or in_channels != out_channels * block.expansion:
+            downsample = nn.SequentialCell(
+                nn.Conv2d(
+                    in_channels,
+                    out_channels * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                ),
+                nn.BatchNorm2d(out_channels * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(in_channels, out_channels, stride, down_sample=downsample))
+        for _ in range(1, blocks):
+            layers.append(block(out_channels * block.expansion, out_channels))
+
+        return nn.SequentialCell(layers)
+
+    def _make_stage(
+        self,
+        layer_config: Dict[str, int],
+        num_inchannels: int,
+        multi_scale_output: bool = True,
+    ) -> Tuple[nn.SequentialCell, List[int]]:
+        num_modules = layer_config["num_modules"]
+        num_branches = layer_config["num_branches"]
+        num_blocks = layer_config["num_blocks"]
+        num_channels = layer_config["num_channels"]
+        block = self.blocks_dict[layer_config["block"]]
+
+        modules = []
+        for i in range(num_modules):
+            # multi_scale_output is only used last module
+            if not multi_scale_output and i == num_modules - 1:
+                reset_multi_scale_output = False
+            else:
+                reset_multi_scale_output = True
+
+            modules.append(
+                HRModule(
+                    num_branches,
+                    block,
+                    num_blocks,
+                    num_inchannels,
+                    num_channels,
+                    reset_multi_scale_output,
+                )
+            )
+            num_inchannels = modules[-1].num_inchannels
+
+        return nn.SequentialCell(modules), num_inchannels
+
+    def forward_features(self, x: Tensor) -> List[Tensor]:
+        """Perform the feature extraction.
+
+        Args:
+            x: Tensor
+
+        Returns:
+            Extracted feature
+        """
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = self.relu(x)
+
+        # stage 1
+        x = self.layer1(x)
+
+        # stage 2
+        x_list = []
+        for i in range(self.stage2_cfg["num_branches"]):
+            if self.transition1_flags[i]:
+                x_list.append(self.transition1[i](x))
+            else:
+                x_list.append(x)
+        y_list = self.stage2(x_list)
+
+        # stage 3
+        x_list = []
+        for i in range(self.stage3_cfg["num_branches"]):
+            if self.transition2_flags[i]:
+                x_list.append(self.transition2[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y_list = self.stage3(x_list)
+
+        # stage 4
+        x_list = []
+        for i in range(self.stage4_cfg["num_branches"]):
+            if self.transition3_flags[i]:
+                x_list.append(self.transition3[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y = self.stage4(x_list)
+
+        return y
+
+    def forward_head(self, x: List[Tensor]) -> Tensor:
+        y = self.incre_modules[0](x[0])
+        for i in range(len(self.downsample_modules)):
+            y = self.incre_modules[i + 1](x[i + 1]) + self.downsample_modules[i](y)
+
+        y = self.final_layer(y)
+        y = self.pool(y)
+        y = self.classifier(y)
+        return y
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.hrnet.HRNet.forward_features(x) + +

+ + +
+ +

Perform the feature extraction.

+ + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
x +
+

Tensor

+
+

+ + TYPE: + Tensor + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + List[Tensor] + + +
+

Extracted feature

+
+
+ +
+ Source code in mindcv/models/hrnet.py +
620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
+663
+664
+665
+666
def forward_features(self, x: Tensor) -> List[Tensor]:
+    """Perform the feature extraction.
+
+    Args:
+        x: Tensor
+
+    Returns:
+        Extracted feature
+    """
+    x = self.conv1(x)
+    x = self.bn1(x)
+    x = self.relu(x)
+    x = self.conv2(x)
+    x = self.bn2(x)
+    x = self.relu(x)
+
+    # stage 1
+    x = self.layer1(x)
+
+    # stage 2
+    x_list = []
+    for i in range(self.stage2_cfg["num_branches"]):
+        if self.transition1_flags[i]:
+            x_list.append(self.transition1[i](x))
+        else:
+            x_list.append(x)
+    y_list = self.stage2(x_list)
+
+    # stage 3
+    x_list = []
+    for i in range(self.stage3_cfg["num_branches"]):
+        if self.transition2_flags[i]:
+            x_list.append(self.transition2[i](y_list[-1]))
+        else:
+            x_list.append(y_list[i])
+    y_list = self.stage3(x_list)
+
+    # stage 4
+    x_list = []
+    for i in range(self.stage4_cfg["num_branches"]):
+        if self.transition3_flags[i]:
+            x_list.append(self.transition3[i](y_list[-1]))
+        else:
+            x_list.append(y_list[i])
+    y = self.stage4(x_list)
+
+    return y
+
+
+
+ +
+ + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.hrnet.hrnet_w32(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get HRNet with width=32 model. +Refer to the base class models.HRNet for more details.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

Whether the model is pretrained. Default: False

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

number of classification classes. Default: 1000

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

Number of input channels. Default: 3

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + Union[HRNet, HRNetFeatures] + + +
+

HRNet model

+
+
+ +
+ Source code in mindcv/models/hrnet.py +
756
+757
+758
+759
+760
+761
+762
+763
+764
+765
+766
+767
+768
+769
+770
+771
+772
+773
+774
+775
+776
+777
+778
+779
+780
+781
+782
+783
+784
+785
+786
+787
+788
+789
+790
+791
+792
+793
+794
+795
+796
+797
+798
+799
+800
+801
+802
+803
@register_model
+def hrnet_w32(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> Union[HRNet, HRNetFeatures]:
+    """Get HRNet with width=32 model.
+    Refer to the base class `models.HRNet` for more details.
+
+    Args:
+        pretrained: Whether the model is pretrained. Default: False
+        num_classes: number of classification classes. Default: 1000
+        in_channels: Number of input channels. Default: 3
+
+    Returns:
+        HRNet model
+    """
+    default_cfg = default_cfgs["hrnet_w32"]
+    stage_cfg = dict(
+        stage1=dict(
+            num_modules=1,
+            num_branches=1,
+            block="BOTTLENECK",
+            num_blocks=[4],
+            num_channels=[64],
+        ),
+        stage2=dict(
+            num_modules=1,
+            num_branches=2,
+            block="BASIC",
+            num_blocks=[4, 4],
+            num_channels=[32, 64],
+        ),
+        stage3=dict(
+            num_modules=4,
+            num_branches=3,
+            block="BASIC",
+            num_blocks=[4, 4, 4],
+            num_channels=[32, 64, 128],
+        ),
+        stage4=dict(
+            num_modules=3,
+            num_branches=4,
+            block="BASIC",
+            num_blocks=[4, 4, 4, 4],
+            num_channels=[32, 64, 128, 256],
+        ),
+    )
+    model_args = dict(stage_cfg=stage_cfg, num_classes=num_classes, in_channels=in_channels, **kwargs)
+    return _create_hrnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.hrnet.hrnet_w48(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get HRNet with width=48 model. +Refer to the base class models.HRNet for more details.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

Whether the model is pretrained. Default: False

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

number of classification classes. Default: 1000

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

Number of input channels. Default: 3

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + Union[HRNet, HRNetFeatures] + + +
+

HRNet model

+
+
+ +
+ Source code in mindcv/models/hrnet.py +
806
+807
+808
+809
+810
+811
+812
+813
+814
+815
+816
+817
+818
+819
+820
+821
+822
+823
+824
+825
+826
+827
+828
+829
+830
+831
+832
+833
+834
+835
+836
+837
+838
+839
+840
+841
+842
+843
+844
+845
+846
+847
+848
+849
+850
+851
+852
+853
@register_model
+def hrnet_w48(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> Union[HRNet, HRNetFeatures]:
+    """Get HRNet with width=48 model.
+    Refer to the base class `models.HRNet` for more details.
+
+    Args:
+        pretrained: Whether the model is pretrained. Default: False
+        num_classes: number of classification classes. Default: 1000
+        in_channels: Number of input channels. Default: 3
+
+    Returns:
+        HRNet model
+    """
+    default_cfg = default_cfgs["hrnet_w48"]
+    stage_cfg = dict(
+        stage1=dict(
+            num_modules=1,
+            num_branches=1,
+            block="BOTTLENECK",
+            num_blocks=[4],
+            num_channels=[64],
+        ),
+        stage2=dict(
+            num_modules=1,
+            num_branches=2,
+            block="BASIC",
+            num_blocks=[4, 4],
+            num_channels=[48, 96],
+        ),
+        stage3=dict(
+            num_modules=4,
+            num_branches=3,
+            block="BASIC",
+            num_blocks=[4, 4, 4],
+            num_channels=[48, 96, 192],
+        ),
+        stage4=dict(
+            num_modules=3,
+            num_branches=4,
+            block="BASIC",
+            num_blocks=[4, 4, 4, 4],
+            num_channels=[48, 96, 192, 384],
+        ),
+    )
+    model_args = dict(stage_cfg=stage_cfg, num_classes=num_classes, in_channels=in_channels, **kwargs)
+    return _create_hrnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +

inceptionv3

+ + +
+ + + +

+ mindcv.models.inceptionv3.InceptionV3 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Inception v3 model architecture from +"Rethinking the Inception Architecture for Computer Vision" <https://arxiv.org/abs/1512.00567>_.

+

.. note:: + Important: In contrast to the other models the inception_v3 expects tensors with a size of + N x 3 x 299 x 299, so ensure your images are sized accordingly.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
aux_logits +
+

use auxiliary classifier or not. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
drop_rate +
+

dropout rate of the layer before main classifier. Default: 0.2.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.2 + +

+
+ +
+ Source code in mindcv/models/inceptionv3.py +
224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
class InceptionV3(nn.Cell):
+    r"""Inception v3 model architecture from
+    `"Rethinking the Inception Architecture for Computer Vision" <https://arxiv.org/abs/1512.00567>`_.
+
+    .. note::
+        **Important**: In contrast to the other models the inception_v3 expects tensors with a size of
+        N x 3 x 299 x 299, so ensure your images are sized accordingly.
+
+    Args:
+        num_classes: number of classification classes. Default: 1000.
+        aux_logits: use auxiliary classifier or not. Default: False.
+        in_channels: number the channels of the input. Default: 3.
+        drop_rate: dropout rate of the layer before main classifier. Default: 0.2.
+    """
+
+    def __init__(
+        self,
+        num_classes: int = 1000,
+        aux_logits: bool = True,
+        in_channels: int = 3,
+        drop_rate: float = 0.2,
+    ) -> None:
+        super().__init__()
+        self.aux_logits = aux_logits
+        self.conv1a = BasicConv2d(in_channels, 32, kernel_size=3, stride=2, pad_mode="valid")
+        self.conv2a = BasicConv2d(32, 32, kernel_size=3, stride=1, pad_mode="valid")
+        self.conv2b = BasicConv2d(32, 64, kernel_size=3, stride=1)
+        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2)
+        self.conv3b = BasicConv2d(64, 80, kernel_size=1)
+        self.conv4a = BasicConv2d(80, 192, kernel_size=3, pad_mode="valid")
+        self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2)
+        self.inception5b = InceptionA(192, pool_features=32)
+        self.inception5c = InceptionA(256, pool_features=64)
+        self.inception5d = InceptionA(288, pool_features=64)
+        self.inception6a = InceptionB(288)
+        self.inception6b = InceptionC(768, channels_7x7=128)
+        self.inception6c = InceptionC(768, channels_7x7=160)
+        self.inception6d = InceptionC(768, channels_7x7=160)
+        self.inception6e = InceptionC(768, channels_7x7=192)
+        if self.aux_logits:
+            self.aux = InceptionAux(768, num_classes)
+        self.inception7a = InceptionD(768)
+        self.inception7b = InceptionE(1280)
+        self.inception7c = InceptionE(2048)
+
+        self.pool = GlobalAvgPooling()
+        self.dropout = Dropout(p=drop_rate)
+        self.num_features = 2048
+        self.classifier = nn.Dense(self.num_features, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.XavierUniform(), cell.weight.shape, cell.weight.dtype))
+
+    def forward_preaux(self, x: Tensor) -> Tensor:
+        x = self.conv1a(x)
+        x = self.conv2a(x)
+        x = self.conv2b(x)
+        x = self.maxpool1(x)
+        x = self.conv3b(x)
+        x = self.conv4a(x)
+        x = self.maxpool2(x)
+        x = self.inception5b(x)
+        x = self.inception5c(x)
+        x = self.inception5d(x)
+        x = self.inception6a(x)
+        x = self.inception6b(x)
+        x = self.inception6c(x)
+        x = self.inception6d(x)
+        x = self.inception6e(x)
+        return x
+
+    def forward_postaux(self, x: Tensor) -> Tensor:
+        x = self.inception7a(x)
+        x = self.inception7b(x)
+        x = self.inception7c(x)
+        return x
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.forward_preaux(x)
+        x = self.forward_postaux(x)
+        return x
+
+    def construct(self, x: Tensor) -> Union[Tensor, Tuple[Tensor, Tensor]]:
+        x = self.forward_preaux(x)
+        if self.training and self.aux_logits:
+            aux = self.aux(x)
+        else:
+            aux = None
+        x = self.forward_postaux(x)
+
+        x = self.pool(x)
+        x = self.dropout(x)
+        x = self.classifier(x)
+
+        if self.training and self.aux_logits:
+            return x, aux
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.inceptionv3.inception_v3(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get InceptionV3 model. +Refer to the base class models.InceptionV3 for more details.

+ +
+ Source code in mindcv/models/inceptionv3.py +
328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
@register_model
+def inception_v3(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> InceptionV3:
+    """Get InceptionV3 model.
+    Refer to the base class `models.InceptionV3` for more details."""
+    default_cfg = default_cfgs["inception_v3"]
+    model = InceptionV3(num_classes=num_classes, aux_logits=True, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

inceptionv4

+ + +
+ + + +

+ mindcv.models.inceptionv4.InceptionV4 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Inception v4 model architecture from +"Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning" <https://arxiv.org/abs/1602.07261>_. # noqa: E501

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
drop_rate +
+

dropout rate of the layer before main classifier. Default: 0.2.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.2 + +

+
+ +
+ Source code in mindcv/models/inceptionv4.py +
253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
class InceptionV4(nn.Cell):
+    r"""Inception v4 model architecture from
+    `"Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning" <https://arxiv.org/abs/1602.07261>`_.  # noqa: E501
+
+    Args:
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number the channels of the input. Default: 3.
+        drop_rate: dropout rate of the layer before main classifier. Default: 0.2.
+    """
+
+    def __init__(
+        self,
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        drop_rate: float = 0.2,
+    ) -> None:
+        super().__init__()
+        blocks = [Stem(in_channels)]
+        for _ in range(4):
+            blocks.append(InceptionA())
+        blocks.append(ReductionA())
+        for _ in range(7):
+            blocks.append(InceptionB())
+        blocks.append(ReductionB())
+        for _ in range(3):
+            blocks.append(InceptionC())
+        self.features = nn.SequentialCell(blocks)
+
+        self.pool = GlobalAvgPooling()
+        self.dropout = Dropout(p=drop_rate)
+        self.num_features = 1536
+        self.classifier = nn.Dense(self.num_features, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.XavierUniform(), cell.weight.shape, cell.weight.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.dropout(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.inceptionv4.inception_v4(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get InceptionV4 model. +Refer to the base class models.InceptionV4 for more details.

+ +
+ Source code in mindcv/models/inceptionv4.py +
310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
@register_model
+def inception_v4(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> InceptionV4:
+    """Get InceptionV4 model.
+    Refer to the base class `models.InceptionV4` for more details."""
+    default_cfg = default_cfgs["inception_v4"]
+    model = InceptionV4(num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

mixnet

+ + +
+ + + +

+ mindcv.models.mixnet.MixNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

MixNet model class, based on +"MixConv: Mixed Depthwise Convolutional Kernels" <https://arxiv.org/abs/1907.09595>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
arch +
+

size of the architecture. "small", "medium" or "large". Default: "small".

+
+

+ + TYPE: + str + + + DEFAULT: + 'small' + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number of the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
feature_size +
+

numbet of the channels of the output features. Default: 1536.

+
+

+ + TYPE: + int + + + DEFAULT: + 1536 + +

+
drop_rate +
+

rate of dropout for classifier. Default: 0.2.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.2 + +

+
depth_multiplier +
+

expansion coefficient of channels. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
+ +
+ Source code in mindcv/models/mixnet.py +
227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
class MixNet(nn.Cell):
+    r"""MixNet model class, based on
+    `"MixConv: Mixed Depthwise Convolutional Kernels" <https://arxiv.org/abs/1907.09595>`_
+
+    Args:
+        arch: size of the architecture. "small", "medium" or "large". Default: "small".
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number of the channels of the input. Default: 3.
+        feature_size: numbet of the channels of the output features. Default: 1536.
+        drop_rate: rate of dropout for classifier. Default: 0.2.
+        depth_multiplier: expansion coefficient of channels. Default: 1.0.
+    """
+
+    def __init__(
+        self,
+        arch: str = "small",
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        feature_size: int = 1536,
+        drop_rate: float = 0.2,
+        depth_multiplier: float = 1.0
+    ) -> None:
+        super(MixNet, self).__init__()
+        if arch == "small":
+            block_configs = [
+                [16, 16, [3], [1], [1], 1, 1, "ReLU", 0.0],
+                [16, 24, [3], [1, 1], [1, 1], 2, 6, "ReLU", 0.0],
+                [24, 24, [3], [1, 1], [1, 1], 1, 3, "ReLU", 0.0],
+                [24, 40, [3, 5, 7], [1], [1], 2, 6, "Swish", 0.5],
+                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, "Swish", 0.5],
+                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, "Swish", 0.5],
+                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, "Swish", 0.5],
+                [40, 80, [3, 5, 7], [1], [1, 1], 2, 6, "Swish", 0.25],
+                [80, 80, [3, 5], [1], [1, 1], 1, 6, "Swish", 0.25],
+                [80, 80, [3, 5], [1], [1, 1], 1, 6, "Swish", 0.25],
+                [80, 120, [3, 5, 7], [1, 1], [1, 1], 1, 6, "Swish", 0.5],
+                [120, 120, [3, 5, 7, 9], [1, 1], [1, 1], 1, 3, "Swish", 0.5],
+                [120, 120, [3, 5, 7, 9], [1, 1], [1, 1], 1, 3, "Swish", 0.5],
+                [120, 200, [3, 5, 7, 9, 11], [1], [1], 2, 6, "Swish", 0.5],
+                [200, 200, [3, 5, 7, 9], [1], [1, 1], 1, 6, "Swish", 0.5],
+                [200, 200, [3, 5, 7, 9], [1], [1, 1], 1, 6, "Swish", 0.5]
+            ]
+            stem_channels = 16
+            drop_rate = drop_rate
+        else:
+            block_configs = [
+                [24, 24, [3], [1], [1], 1, 1, "ReLU", 0.0],
+                [24, 32, [3, 5, 7], [1, 1], [1, 1], 2, 6, "ReLU", 0.0],
+                [32, 32, [3], [1, 1], [1, 1], 1, 3, "ReLU", 0.0],
+                [32, 40, [3, 5, 7, 9], [1], [1], 2, 6, "Swish", 0.5],
+                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, "Swish", 0.5],
+                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, "Swish", 0.5],
+                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, "Swish", 0.5],
+                [40, 80, [3, 5, 7], [1], [1], 2, 6, "Swish", 0.25],
+                [80, 80, [3, 5, 7, 9], [1, 1], [1, 1], 1, 6, "Swish", 0.25],
+                [80, 80, [3, 5, 7, 9], [1, 1], [1, 1], 1, 6, "Swish", 0.25],
+                [80, 80, [3, 5, 7, 9], [1, 1], [1, 1], 1, 6, "Swish", 0.25],
+                [80, 120, [3], [1], [1], 1, 6, "Swish", 0.5],
+                [120, 120, [3, 5, 7, 9], [1, 1], [1, 1], 1, 3, "Swish", 0.5],
+                [120, 120, [3, 5, 7, 9], [1, 1], [1, 1], 1, 3, "Swish", 0.5],
+                [120, 120, [3, 5, 7, 9], [1, 1], [1, 1], 1, 3, "Swish", 0.5],
+                [120, 200, [3, 5, 7, 9], [1], [1], 2, 6, "Swish", 0.5],
+                [200, 200, [3, 5, 7, 9], [1], [1, 1], 1, 6, "Swish", 0.5],
+                [200, 200, [3, 5, 7, 9], [1], [1, 1], 1, 6, "Swish", 0.5],
+                [200, 200, [3, 5, 7, 9], [1], [1, 1], 1, 6, "Swish", 0.5]
+            ]
+            if arch == "medium":
+                stem_channels = 24
+                drop_rate = drop_rate
+            elif arch == "large":
+                stem_channels = 24
+                depth_multiplier *= 1.3
+                drop_rate = drop_rate
+            else:
+                raise ValueError(f"Unsupported model type {arch}")
+
+        if depth_multiplier != 1.0:
+            stem_channels = _roundchannels(stem_channels * depth_multiplier)
+
+            for i, conf in enumerate(block_configs):
+                conf_ls = list(conf)
+                conf_ls[0] = _roundchannels(conf_ls[0] * depth_multiplier)
+                conf_ls[1] = _roundchannels(conf_ls[1] * depth_multiplier)
+                block_configs[i] = tuple(conf_ls)
+
+        # stem convolution
+        self.stem_conv = nn.SequentialCell([
+            nn.Conv2d(in_channels, stem_channels, 3, stride=2, pad_mode="pad", padding=1),
+            nn.BatchNorm2d(stem_channels),
+            nn.ReLU()
+        ])
+
+        # building MixNet blocks
+        layers = []
+        for inc, outc, k, ek, pk, s, er, ac, se in block_configs:
+            layers.append(MixNetBlock(
+                inc,
+                outc,
+                kernel_size=k,
+                expand_ksize=ek,
+                project_ksize=pk,
+                stride=s,
+                expand_ratio=er,
+                activation=ac,
+                se_ratio=se
+            ))
+        self.layers = nn.SequentialCell(layers)
+
+        # head
+        self.head_conv = nn.SequentialCell([
+            nn.Conv2d(block_configs[-1][1], feature_size, 1, pad_mode="pad", padding=0),
+            nn.BatchNorm2d(feature_size),
+            nn.ReLU()
+        ])
+
+        self.pool = GlobalAvgPooling()
+        self.dropout = Dropout(p=drop_rate)
+        self.classifier = nn.Dense(feature_size, num_classes)
+
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                fan_out = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels
+                cell.weight.set_data(
+                    init.initializer(init.Normal(math.sqrt(2.0 / fan_out)),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.Uniform(1.0 / math.sqrt(cell.weight.shape[0])),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.stem_conv(x)
+        x = self.layers(x)
+        x = self.head_conv(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.dropout(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.mixnet.mixnet_l(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mixnet.py +
409
+410
+411
+412
+413
+414
+415
+416
+417
@register_model
+def mixnet_l(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["mixnet_l"]
+    model = MixNet(arch="large", in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mixnet.mixnet_m(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mixnet.py +
398
+399
+400
+401
+402
+403
+404
+405
+406
@register_model
+def mixnet_m(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["mixnet_m"]
+    model = MixNet(arch="medium", in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mixnet.mixnet_s(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mixnet.py +
387
+388
+389
+390
+391
+392
+393
+394
+395
@register_model
+def mixnet_s(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["mixnet_s"]
+    model = MixNet(arch="small", in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

mlpmixer

+ + +
+ + + +

+ mindcv.models.mlpmixer.MLPMixer + + +

+ + +
+

+ Bases: nn.Cell

+ + +

MLP-Mixer model class, based on +"MLP-Mixer: An all-MLP Architecture for Vision" <https://arxiv.org/abs/2105.01601>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
depth +
+

number of MixerBlocks.

+
+

+ + TYPE: + int) + +

+
patch_size +
+

size of a single image patch.

+
+

+ + TYPE: + int or tuple) + +

+
n_patches +
+

number of patches.

+
+

+ + TYPE: + int) + +

+
n_channels +
+

channels(dimension) of a single embedded patch.

+
+

+ + TYPE: + int) + +

+
token_dim +
+

hidden dim of token-mixing MLP.

+
+

+ + TYPE: + int) + +

+
channel_dim +
+

hidden dim of channel-mixing MLP.

+
+

+ + TYPE: + int) + +

+
num_classes +
+

number of classification classes.

+
+

+ + TYPE: + int) + + + DEFAULT: + 1000 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + DEFAULT: + 3 + +

+
+ +
+ Source code in mindcv/models/mlpmixer.py +
104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
class MLPMixer(nn.Cell):
+    r"""MLP-Mixer model class, based on
+    `"MLP-Mixer: An all-MLP Architecture for Vision" <https://arxiv.org/abs/2105.01601>`_
+
+    Args:
+        depth (int) : number of MixerBlocks.
+        patch_size (int or tuple) : size of a single image patch.
+        n_patches (int) : number of patches.
+        n_channels (int) : channels(dimension) of a single embedded patch.
+        token_dim (int) : hidden dim of token-mixing MLP.
+        channel_dim (int) : hidden dim of channel-mixing MLP.
+        num_classes (int) : number of classification classes.
+        in_channels: number the channels of the input. Default: 3.
+    """
+
+    def __init__(self, depth, patch_size, n_patches, n_channels, token_dim, channel_dim, num_classes=1000,
+                 in_channels=3):
+        super().__init__()
+        self.n_patches = n_patches
+        self.n_channels = n_channels
+        # patch with shape of (3, patch_size, patch_size) is embedded to n_channels dim feature.
+        self.to_patch_embedding = nn.SequentialCell(
+            nn.Conv2d(in_channels, n_channels, patch_size, patch_size, pad_mode="pad", padding=0),
+            TransPose(permutation=(0, 2, 1), embedding=True),
+        )
+        self.mixer_blocks = nn.SequentialCell()
+        for _ in range(depth):
+            self.mixer_blocks.append(MixerBlock(n_patches, n_channels, token_dim, channel_dim))
+        self.layer_norm = nn.LayerNorm((n_channels,))
+        self.mlp_head = nn.Dense(n_channels, num_classes)
+        self.mean = ops.ReduceMean()
+        self._initialize_weights()
+
+    def construct(self, x):
+        x = self.to_patch_embedding(x)
+        x = self.mixer_blocks(x)
+        x = self.layer_norm(x)
+        x = self.mean(x, 1)
+        return self.mlp_head(x)
+
+    def _initialize_weights(self):
+        # todo: implement weights init
+        pass
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.mlpmixer.mlp_mixer_b_p16(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mlpmixer.py +
203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
@register_model
+def mlp_mixer_b_p16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 12, 16, 196, 768, 384, 3072
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,
+                     num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    default_cfg = default_cfgs["mlp_mixer_b_p16"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mlpmixer.mlp_mixer_b_p32(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mlpmixer.py +
189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
@register_model
+def mlp_mixer_b_p32(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 12, 32, 49, 768, 384, 3072
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,
+                     num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    default_cfg = default_cfgs["mlp_mixer_b_p32"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mlpmixer.mlp_mixer_h_p14(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mlpmixer.py +
245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
@register_model
+def mlp_mixer_h_p14(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 32, 14, 256, 1280, 640, 5120
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,
+                     num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    default_cfg = default_cfgs["mlp_mixer_h_p14"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mlpmixer.mlp_mixer_l_p16(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mlpmixer.py +
231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
@register_model
+def mlp_mixer_l_p16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 24, 16, 196, 1024, 512, 4096
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,
+                     num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    default_cfg = default_cfgs["mlp_mixer_l_p16"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mlpmixer.mlp_mixer_l_p32(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mlpmixer.py +
217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
@register_model
+def mlp_mixer_l_p32(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 24, 32, 49, 1024, 512, 4096
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,
+                     num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    default_cfg = default_cfgs["mlp_mixer_l_p32"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mlpmixer.mlp_mixer_s_p16(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mlpmixer.py +
175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
@register_model
+def mlp_mixer_s_p16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 8, 16, 196, 512, 256, 2048
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,
+                     num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    default_cfg = default_cfgs["mlp_mixer_s_p16"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mlpmixer.mlp_mixer_s_p32(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mlpmixer.py +
160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
@register_model
+def mlp_mixer_s_p32(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    # number_of_layers, patch_resolution, length_of_sequence, hidden_size, mpl_dim_sequence, mpl_dim_channel
+    nl, pr, ls, hs, ds, dc = 8, 32, 49, 512, 256, 2048
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs,
+                     token_dim=ds, channel_dim=dc, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    default_cfg = default_cfgs["mlp_mixer_s_p32"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

mnasnet

+ + +
+ + + +

+ mindcv.models.mnasnet.Mnasnet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

MnasNet model architecture from +"MnasNet: Platform-Aware Neural Architecture Search for Mobile" <https://arxiv.org/abs/1807.11626>_.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
alpha +
+

scale factor of model width.

+
+

+ + TYPE: + float + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
drop_rate +
+

dropout rate of the layer before main classifier. Default: 0.2.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.2 + +

+
+ +
+ Source code in mindcv/models/mnasnet.py +
 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
class Mnasnet(nn.Cell):
+    r"""MnasNet model architecture from
+    `"MnasNet: Platform-Aware Neural Architecture Search for Mobile" <https://arxiv.org/abs/1807.11626>`_.
+
+    Args:
+        alpha: scale factor of model width.
+        in_channels: number the channels of the input. Default: 3.
+        num_classes: number of classification classes. Default: 1000.
+        drop_rate: dropout rate of the layer before main classifier. Default: 0.2.
+    """
+
+    def __init__(
+        self,
+        alpha: float,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+        drop_rate: float = 0.2,
+    ):
+        super().__init__()
+
+        inverted_residual_setting = [
+            # t, c, n, s, k
+            [3, 24, 3, 2, 3],  # -> 56x56
+            [3, 40, 3, 2, 5],  # -> 28x28
+            [6, 80, 3, 2, 5],  # -> 14x14
+            [6, 96, 2, 1, 3],  # -> 14x14
+            [6, 192, 4, 2, 5],  # -> 7x7
+            [6, 320, 1, 1, 3],  # -> 7x7
+        ]
+
+        mid_channels = make_divisible(32 * alpha, 8)
+        input_channels = make_divisible(16 * alpha, 8)
+
+        features: List[nn.Cell] = [
+            nn.Conv2d(in_channels, mid_channels, kernel_size=3, stride=2, pad_mode="pad", padding=1),
+            nn.BatchNorm2d(mid_channels, momentum=0.99, eps=1e-3),
+            nn.ReLU(),
+            nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=1, pad_mode="pad", padding=1,
+                      group=mid_channels),
+            nn.BatchNorm2d(mid_channels, momentum=0.99, eps=1e-3),
+            nn.ReLU(),
+            nn.Conv2d(mid_channels, input_channels, kernel_size=1, stride=1),
+            nn.BatchNorm2d(input_channels, momentum=0.99, eps=1e-3),
+        ]
+
+        for t, c, n, s, k in inverted_residual_setting:
+            output_channels = make_divisible(c * alpha, 8)
+            for i in range(n):
+                stride = s if i == 0 else 1
+                features.append(InvertedResidual(input_channels, output_channels,
+                                                 stride=stride, kernel_size=k, expand_ratio=t))
+                input_channels = output_channels
+
+        features.extend([
+            nn.Conv2d(input_channels, 1280, kernel_size=1, stride=1),
+            nn.BatchNorm2d(1280, momentum=0.99, eps=1e-3),
+            nn.ReLU(),
+        ])
+        self.features = nn.SequentialCell(features)
+        self.pool = GlobalAvgPooling()
+        self.dropout = Dropout(p=drop_rate)
+        self.classifier = nn.Dense(1280, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(mode="fan_out", nonlinearity="relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(mode="fan_out", nonlinearity="sigmoid"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.dropout(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.mnasnet.mnasnet_050(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MnasNet model with width scaled by 0.5. +Refer to the base class models.Mnasnet for more details.

+ +
+ Source code in mindcv/models/mnasnet.py +
180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
@register_model
+def mnasnet_050(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> Mnasnet:
+    """Get MnasNet model with width scaled by 0.5.
+    Refer to the base class `models.Mnasnet` for more details."""
+    default_cfg = default_cfgs["mnasnet_050"]
+    model = Mnasnet(alpha=0.5, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mnasnet.mnasnet_075(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MnasNet model with width scaled by 0.75. +Refer to the base class models.Mnasnet for more details.

+ +
+ Source code in mindcv/models/mnasnet.py +
193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
@register_model
+def mnasnet_075(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> Mnasnet:
+    """Get MnasNet model with width scaled by 0.75.
+    Refer to the base class `models.Mnasnet` for more details."""
+    default_cfg = default_cfgs["mnasnet_075"]
+    model = Mnasnet(alpha=0.75, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mnasnet.mnasnet_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MnasNet model with width scaled by 1.0. +Refer to the base class models.Mnasnet for more details.

+ +
+ Source code in mindcv/models/mnasnet.py +
206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
@register_model
+def mnasnet_100(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> Mnasnet:
+    """Get MnasNet model with width scaled by 1.0.
+    Refer to the base class `models.Mnasnet` for more details."""
+    default_cfg = default_cfgs["mnasnet_100"]
+    model = Mnasnet(alpha=1.0, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mnasnet.mnasnet_130(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MnasNet model with width scaled by 1.3. +Refer to the base class models.Mnasnet for more details.

+ +
+ Source code in mindcv/models/mnasnet.py +
219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
@register_model
+def mnasnet_130(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> Mnasnet:
+    """Get MnasNet model with width scaled by 1.3.
+    Refer to the base class `models.Mnasnet` for more details."""
+    default_cfg = default_cfgs["mnasnet_130"]
+    model = Mnasnet(alpha=1.3, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mnasnet.mnasnet_140(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MnasNet model with width scaled by 1.4. +Refer to the base class models.Mnasnet for more details.

+ +
+ Source code in mindcv/models/mnasnet.py +
232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
@register_model
+def mnasnet_140(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> Mnasnet:
+    """Get MnasNet model with width scaled by 1.4.
+    Refer to the base class `models.Mnasnet` for more details."""
+    default_cfg = default_cfgs["mnasnet_140"]
+    model = Mnasnet(alpha=1.4, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

mobilenetv1

+ + +
+ + + +

+ mindcv.models.mobilenetv1.MobileNetV1 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

MobileNetV1 model class, based on +"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" <https://arxiv.org/abs/1704.04861>_ # noqa: E501

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
alpha +
+

scale factor of model width. Default: 1.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
+ +
+ Source code in mindcv/models/mobilenetv1.py +
 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
class MobileNetV1(nn.Cell):
+    r"""MobileNetV1 model class, based on
+    `"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" <https://arxiv.org/abs/1704.04861>`_  # noqa: E501
+
+    Args:
+        alpha: scale factor of model width. Default: 1.
+        in_channels: number the channels of the input. Default: 3.
+        num_classes: number of classification classes. Default: 1000.
+    """
+
+    def __init__(
+        self,
+        alpha: float = 1.0,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+    ) -> None:
+        super().__init__()
+        input_channels = int(32 * alpha)
+        # Setting of depth-wise separable conv
+        # c: number of output channel
+        # s: stride of depth-wise conv
+        block_setting = [
+            # c, s
+            [64, 1],
+            [128, 2],
+            [128, 1],
+            [256, 2],
+            [256, 1],
+            [512, 2],
+            [512, 1],
+            [512, 1],
+            [512, 1],
+            [512, 1],
+            [512, 1],
+            [1024, 2],
+            [1024, 1],
+        ]
+
+        features = [
+            nn.Conv2d(in_channels, input_channels, 3, 2, pad_mode="pad", padding=1, has_bias=False),
+            nn.BatchNorm2d(input_channels),
+            nn.ReLU(),
+        ]
+        for c, s in block_setting:
+            output_channel = int(c * alpha)
+            features.append(depthwise_separable_conv(input_channels, output_channel, s))
+            input_channels = output_channel
+        self.features = nn.SequentialCell(features)
+
+        self.pool = GlobalAvgPooling()
+        self.classifier = nn.Dense(input_channels, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(init.initializer(init.XavierUniform(), cell.weight.shape, cell.weight.dtype))
+            if isinstance(cell, nn.Dense):
+                cell.weight.set_data(init.initializer(init.TruncatedNormal(), cell.weight.shape, cell.weight.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv1.mobilenet_v1_025(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV1 model with width scaled by 0.25. +Refer to the base class models.MobileNetV1 for more details.

+ +
+ Source code in mindcv/models/mobilenetv1.py +
137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
@register_model
+def mobilenet_v1_025(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV1:
+    """Get MobileNetV1 model with width scaled by 0.25.
+    Refer to the base class `models.MobileNetV1` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v1_025"]
+    model = MobileNetV1(alpha=0.25, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv1.mobilenet_v1_050(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV1 model with width scaled by 0.5. +Refer to the base class models.MobileNetV1 for more details.

+ +
+ Source code in mindcv/models/mobilenetv1.py +
151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
@register_model
+def mobilenet_v1_050(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV1:
+    """Get MobileNetV1 model with width scaled by 0.5.
+    Refer to the base class `models.MobileNetV1` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v1_050"]
+    model = MobileNetV1(alpha=0.5, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv1.mobilenet_v1_075(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV1 model with width scaled by 0.75. +Refer to the base class models.MobileNetV1 for more details.

+ +
+ Source code in mindcv/models/mobilenetv1.py +
165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
@register_model
+def mobilenet_v1_075(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV1:
+    """Get MobileNetV1 model with width scaled by 0.75.
+    Refer to the base class `models.MobileNetV1` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v1_075"]
+    model = MobileNetV1(alpha=0.75, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv1.mobilenet_v1_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV1 model without width scaling. +Refer to the base class models.MobileNetV1 for more details.

+ +
+ Source code in mindcv/models/mobilenetv1.py +
179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
@register_model
+def mobilenet_v1_100(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV1:
+    """Get MobileNetV1 model without width scaling.
+    Refer to the base class `models.MobileNetV1` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v1_100"]
+    model = MobileNetV1(alpha=1.0, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

mobilenetv2

+ + +
+ + + +

+ mindcv.models.mobilenetv2.MobileNetV2 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

MobileNetV2 model class, based on +"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
alpha +
+

scale factor of model width. Default: 1.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
round_nearest +
+

divisor of make divisible function. Default: 8.

+
+

+ + TYPE: + int + + + DEFAULT: + 8 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
+ +
+ Source code in mindcv/models/mobilenetv2.py +
163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
class MobileNetV2(nn.Cell):
+    r"""MobileNetV2 model class, based on
+    `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_
+
+    Args:
+        alpha: scale factor of model width. Default: 1.
+        round_nearest: divisor of make divisible function. Default: 8.
+        in_channels: number the channels of the input. Default: 3.
+        num_classes: number of classification classes. Default: 1000.
+    """
+
+    def __init__(
+        self,
+        alpha: float = 1.0,
+        round_nearest: int = 8,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+    ) -> None:
+        super().__init__()
+        input_channels = make_divisible(32 * alpha, round_nearest)
+        # Setting of inverted residual blocks.
+        # t: The expansion factor.
+        # c: Number of output channel.
+        # n: Number of block.
+        # s: First block stride.
+        inverted_residual_setting = [
+            # t, c, n, s
+            [1, 16, 1, 1],
+            [6, 24, 2, 2],
+            [6, 32, 3, 2],
+            [6, 64, 4, 2],
+            [6, 96, 3, 1],
+            [6, 160, 3, 2],
+            [6, 320, 1, 1],
+        ]
+        last_channels = make_divisible(1280 * max(1.0, alpha), round_nearest)
+
+        # Building stem conv layer.
+        features = [
+            nn.Conv2d(in_channels, input_channels, 3, 2, pad_mode="pad", padding=1, has_bias=False),
+            nn.BatchNorm2d(input_channels),
+            nn.ReLU6(),
+        ]
+        # Building inverted residual blocks.
+        for t, c, n, s in inverted_residual_setting:
+            output_channel = make_divisible(c * alpha, round_nearest)
+            for i in range(n):
+                stride = s if i == 0 else 1
+                features.append(InvertedResidual(input_channels, output_channel, stride, expand_ratio=t))
+                input_channels = output_channel
+        # Building last point-wise layers.
+        features.extend([
+            nn.Conv2d(input_channels, last_channels, 1, 1, pad_mode="pad", padding=0, has_bias=False),
+            nn.BatchNorm2d(last_channels),
+            nn.ReLU6(),
+        ])
+        self.features = nn.SequentialCell(features)
+
+        self.pool = GlobalAvgPooling()
+        self.classifier = nn.SequentialCell([
+            Dropout(p=0.2),  # confirmed by paper authors
+            nn.Dense(last_channels, num_classes),
+        ])
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                n = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels
+                cell.weight.set_data(
+                    init.initializer(init.Normal(sigma=math.sqrt(2. / n), mean=0.0),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.Normal(sigma=0.01, mean=0.0), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_035_128(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.35 and input image size of 128. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
@register_model
+def mobilenet_v2_035_128(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.35 and input image size of 128.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_035_128"]
+    model = MobileNetV2(alpha=0.35, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_035_160(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.35 and input image size of 160. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
@register_model
+def mobilenet_v2_035_160(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.35 and input image size of 160.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_035_160"]
+    model = MobileNetV2(alpha=0.35, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_035_192(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.35 and input image size of 192. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
514
+515
+516
+517
+518
+519
+520
+521
+522
+523
+524
+525
@register_model
+def mobilenet_v2_035_192(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.35 and input image size of 192.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_035_192"]
+    model = MobileNetV2(alpha=0.35, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_035_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.35 and input image size of 224. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
@register_model
+def mobilenet_v2_035_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.35 and input image size of 224.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_035_224"]
+    model = MobileNetV2(alpha=0.35, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_035_96(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.35 and input image size of 96. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
@register_model
+def mobilenet_v2_035_96(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.35 and input image size of 96.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_035_96"]
+    model = MobileNetV2(alpha=0.35, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_050_128(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.5 and input image size of 128. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
@register_model
+def mobilenet_v2_050_128(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.5 and input image size of 128.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_050_128"]
+    model = MobileNetV2(alpha=0.5, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_050_160(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.5 and input image size of 160. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
@register_model
+def mobilenet_v2_050_160(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.5 and input image size of 160.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_050_160"]
+    model = MobileNetV2(alpha=0.5, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_050_192(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.5 and input image size of 192. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
@register_model
+def mobilenet_v2_050_192(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.5 and input image size of 192.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_050_192"]
+    model = MobileNetV2(alpha=0.5, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_050_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.5 and input image size of 224. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
@register_model
+def mobilenet_v2_050_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.5 and input image size of 224.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_050_224"]
+    model = MobileNetV2(alpha=0.5, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_050_96(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.5 and input image size of 96. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
+497
@register_model
+def mobilenet_v2_050_96(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.5 and input image size of 96.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_050_96"]
+    model = MobileNetV2(alpha=0.5, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_075(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.75 and input image size of 224. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
@register_model
+def mobilenet_v2_075(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.75 and input image size of 224.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_075"]
+    model = MobileNetV2(alpha=0.75, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_075_128(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.75 and input image size of 128. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
@register_model
+def mobilenet_v2_075_128(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.75 and input image size of 128.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_075_128"]
+    model = MobileNetV2(alpha=0.75, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_075_160(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.75 and input image size of 160. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
@register_model
+def mobilenet_v2_075_160(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.75 and input image size of 160.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_075_160"]
+    model = MobileNetV2(alpha=0.75, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_075_192(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.75 and input image size of 192. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
@register_model
+def mobilenet_v2_075_192(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.75 and input image size of 192.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_075_192"]
+    model = MobileNetV2(alpha=0.75, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_075_96(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.75 and input image size of 96. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
@register_model
+def mobilenet_v2_075_96(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.75 and input image size of 96.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_075_96"]
+    model = MobileNetV2(alpha=0.75, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model without width scaling and input image size of 224. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
@register_model
+def mobilenet_v2_100(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model without width scaling and input image size of 224.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_100"]
+    model = MobileNetV2(alpha=1.0, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_100_128(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model without width scaling and input image size of 128. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
@register_model
+def mobilenet_v2_100_128(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model without width scaling and input image size of 128.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_100_128"]
+    model = MobileNetV2(alpha=1.0, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_100_160(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model without width scaling and input image size of 160. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
@register_model
+def mobilenet_v2_100_160(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model without width scaling and input image size of 160.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_100_160"]
+    model = MobileNetV2(alpha=1.0, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_100_192(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model without width scaling and input image size of 192. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
@register_model
+def mobilenet_v2_100_192(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model without width scaling and input image size of 192.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_100_192"]
+    model = MobileNetV2(alpha=1.0, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_100_96(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model without width scaling and input image size of 96. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
@register_model
+def mobilenet_v2_100_96(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model without width scaling and input image size of 96.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_100_96"]
+    model = MobileNetV2(alpha=1.0, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_130_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 1.3 and input image size of 224. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
@register_model
+def mobilenet_v2_130_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 1.3 and input image size of 224.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_130_224"]
+    model = MobileNetV2(alpha=1.3, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_140(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 1.4 and input image size of 224. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
@register_model
+def mobilenet_v2_140(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 1.4 and input image size of 224.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_140"]
+    model = MobileNetV2(alpha=1.4, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

mobilenetv3

+ + +
+ + + +

+ mindcv.models.mobilenetv3.MobileNetV3 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

MobileNetV3 model class, based on +"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
arch +
+

size of the architecture. 'small' or 'large'.

+
+

+ + TYPE: + str + +

+
alpha +
+

scale factor of model width. Default: 1.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
round_nearest +
+

divisor of make divisible function. Default: 8.

+
+

+ + TYPE: + int + + + DEFAULT: + 8 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
+ +
+ Source code in mindcv/models/mobilenetv3.py +
100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
class MobileNetV3(nn.Cell):
+    r"""MobileNetV3 model class, based on
+    `"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>`_
+
+    Args:
+        arch: size of the architecture. 'small' or 'large'.
+        alpha: scale factor of model width. Default: 1.
+        round_nearest: divisor of make divisible function. Default: 8.
+        in_channels: number the channels of the input. Default: 3.
+        num_classes: number of classification classes. Default: 1000.
+    """
+
+    def __init__(
+        self,
+        arch: str,
+        alpha: float = 1.0,
+        round_nearest: int = 8,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+    ) -> None:
+        super().__init__()
+        input_channels = make_divisible(16 * alpha, round_nearest)
+        # Setting of bottleneck blocks. ex: [k, e, c, se, nl, s]
+        # k: kernel size of depth-wise conv
+        # e: expansion size
+        # c: number of output channel
+        # se: whether there is a Squeeze-And-Excite in that block
+        # nl: type of non-linearity used
+        # s: stride of depth-wise conv
+        if arch == "large":
+            bottleneck_setting = [
+                [3, 16, 16, False, "relu", 1],
+                [3, 64, 24, False, "relu", 2],
+                [3, 72, 24, False, "relu", 1],
+                [5, 72, 40, True, "relu", 2],
+                [5, 120, 40, True, "relu", 1],
+                [5, 120, 40, True, "relu", 1],
+                [3, 240, 80, False, "hswish", 2],
+                [3, 200, 80, False, "hswish", 1],
+                [3, 184, 80, False, "hswish", 1],
+                [3, 184, 80, False, "hswish", 1],
+                [3, 480, 112, True, "hswish", 1],
+                [3, 672, 112, True, "hswish", 1],
+                [5, 672, 160, True, "hswish", 2],
+                [5, 960, 160, True, "hswish", 1],
+                [5, 960, 160, True, "hswish", 1],
+            ]
+            last_channels = make_divisible(alpha * 1280, round_nearest)
+        elif arch == "small":
+            bottleneck_setting = [
+                [3, 16, 16, True, "relu", 2],
+                [3, 72, 24, False, "relu", 2],
+                [3, 88, 24, False, "relu", 1],
+                [5, 96, 40, True, "hswish", 2],
+                [5, 240, 40, True, "hswish", 1],
+                [5, 240, 40, True, "hswish", 1],
+                [5, 120, 48, True, "hswish", 1],
+                [5, 144, 48, True, "hswish", 1],
+                [5, 288, 96, True, "hswish", 2],
+                [5, 576, 96, True, "hswish", 1],
+                [5, 576, 96, True, "hswish", 1],
+            ]
+            last_channels = make_divisible(alpha * 1024, round_nearest)
+        else:
+            raise ValueError(f"Unsupported model type {arch}")
+
+        # Building stem conv layer.
+        features = [
+            nn.Conv2d(in_channels, input_channels, 3, 2, pad_mode="pad", padding=1, has_bias=False),
+            nn.BatchNorm2d(input_channels),
+            nn.HSwish(),
+        ]
+
+        total_reduction = 2
+        self.feature_info = [dict(chs=input_channels, reduction=total_reduction, name=f'features.{len(features) - 1}')]
+
+        # Building bottleneck blocks.
+        for k, e, c, se, nl, s in bottleneck_setting:
+            exp_channels = make_divisible(alpha * e, round_nearest)
+            output_channels = make_divisible(alpha * c, round_nearest)
+            features.append(Bottleneck(input_channels, exp_channels, output_channels,
+                                       kernel_size=k, stride=s, activation=nl, use_se=se))
+            input_channels = output_channels
+
+            total_reduction *= s
+            self.feature_info.append(dict(chs=input_channels, reduction=total_reduction,
+                                          name=f'features.{len(features) - 1}'))
+
+        # Building last point-wise conv layers.
+        output_channels = input_channels * 6
+        features.extend([
+            nn.Conv2d(input_channels, output_channels, 1, 1, pad_mode="pad", padding=0, has_bias=False),
+            nn.BatchNorm2d(output_channels),
+            nn.HSwish(),
+        ])
+
+        self.feature_info.append(dict(chs=output_channels, reduction=total_reduction,
+                                      name=f'features.{len(features) - 1}'))
+        self.flatten_sequential = True
+
+        self.features = nn.SequentialCell(features)
+
+        self.pool = GlobalAvgPooling()
+        self.classifier = nn.SequentialCell([
+            nn.Dense(output_channels, last_channels),
+            nn.HSwish(),
+            Dropout(p=0.2),
+            nn.Dense(last_channels, num_classes),
+        ])
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                n = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels
+                cell.weight.set_data(
+                    init.initializer(init.Normal(sigma=math.sqrt(2. / n), mean=0.0),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.Normal(sigma=0.01, mean=0.0), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv3.mobilenet_v3_large_075(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get large MobileNetV3 model with width scaled by 0.75. +Refer to the base class models.MobileNetV3 for more details.

+ +
+ Source code in mindcv/models/mobilenetv3.py +
279
+280
+281
+282
+283
+284
+285
+286
@register_model
+def mobilenet_v3_large_075(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV3:
+    """Get large MobileNetV3 model with width scaled by 0.75.
+    Refer to the base class `models.MobileNetV3` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v3_large_075"]
+    model_args = dict(arch="large", alpha=0.75, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return _create_mobilenet_v3(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv3.mobilenet_v3_large_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get large MobileNetV3 model without width scaling. +Refer to the base class models.MobileNetV3 for more details.

+ +
+ Source code in mindcv/models/mobilenetv3.py +
259
+260
+261
+262
+263
+264
+265
+266
@register_model
+def mobilenet_v3_large_100(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV3:
+    """Get large MobileNetV3 model without width scaling.
+    Refer to the base class `models.MobileNetV3` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v3_large_100"]
+    model_args = dict(arch="large", alpha=1.0, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return _create_mobilenet_v3(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv3.mobilenet_v3_small_075(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get small MobileNetV3 model with width scaled by 0.75. +Refer to the base class models.MobileNetV3 for more details.

+ +
+ Source code in mindcv/models/mobilenetv3.py +
269
+270
+271
+272
+273
+274
+275
+276
@register_model
+def mobilenet_v3_small_075(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV3:
+    """Get small MobileNetV3 model with width scaled by 0.75.
+    Refer to the base class `models.MobileNetV3` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v3_small_075"]
+    model_args = dict(arch="small", alpha=0.75, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return _create_mobilenet_v3(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv3.mobilenet_v3_small_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get small MobileNetV3 model without width scaling. +Refer to the base class models.MobileNetV3 for more details.

+ +
+ Source code in mindcv/models/mobilenetv3.py +
249
+250
+251
+252
+253
+254
+255
+256
@register_model
+def mobilenet_v3_small_100(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV3:
+    """Get small MobileNetV3 model without width scaling.
+    Refer to the base class `models.MobileNetV3` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v3_small_100"]
+    model_args = dict(arch="small", alpha=1.0, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return _create_mobilenet_v3(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +

mobilevit

+ + + +
+ + + +

+mindcv.models.mobilevit.mobilevit_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mobilevit.py +
840
+841
+842
+843
+844
+845
+846
+847
@register_model
+def mobilevit_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileViT:
+    config = get_config("small")
+    model = MobileViT(config, num_classes=num_classes, **kwargs)
+    default_cfg = default_cfgs["mobilevit_small"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilevit.mobilevit_x_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mobilevit.py +
830
+831
+832
+833
+834
+835
+836
+837
@register_model
+def mobilevit_x_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileViT:
+    config = get_config("x_small")
+    model = MobileViT(config, num_classes=num_classes, **kwargs)
+    default_cfg = default_cfgs["mobilevit_x_small"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilevit.mobilevit_xx_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mobilevit.py +
820
+821
+822
+823
+824
+825
+826
+827
@register_model
+def mobilevit_xx_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileViT:
+    config = get_config("xx_small")
+    model = MobileViT(config, num_classes=num_classes, **kwargs)
+    default_cfg = default_cfgs["mobilevit_xx_small"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

nasnet

+ + +
+ + + +

+ mindcv.models.nasnet.NASNetAMobile + + +

+ + +
+

+ Bases: nn.Cell

+ + +

NasNet model class, based on +"Learning Transferable Architectures for Scalable Image Recognition" <https://arxiv.org/pdf/1707.07012v4.pdf>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_classes +
+

number of classification classes.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
stem_filters +
+

number of stem filters. Default: 32.

+
+

+ + TYPE: + int + + + DEFAULT: + 32 + +

+
penultimate_filters +
+

number of penultimate filters. Default: 1056.

+
+

+ + TYPE: + int + + + DEFAULT: + 1056 + +

+
filters_multiplier +
+

size of filters multiplier. Default: 2.

+
+

+ + TYPE: + int + + + DEFAULT: + 2 + +

+
+ +
+ Source code in mindcv/models/nasnet.py +
681
+682
+683
+684
+685
+686
+687
+688
+689
+690
+691
+692
+693
+694
+695
+696
+697
+698
+699
+700
+701
+702
+703
+704
+705
+706
+707
+708
+709
+710
+711
+712
+713
+714
+715
+716
+717
+718
+719
+720
+721
+722
+723
+724
+725
+726
+727
+728
+729
+730
+731
+732
+733
+734
+735
+736
+737
+738
+739
+740
+741
+742
+743
+744
+745
+746
+747
+748
+749
+750
+751
+752
+753
+754
+755
+756
+757
+758
+759
+760
+761
+762
+763
+764
+765
+766
+767
+768
+769
+770
+771
+772
+773
+774
+775
+776
+777
+778
+779
+780
+781
+782
+783
+784
+785
+786
+787
+788
+789
+790
+791
+792
+793
+794
+795
+796
+797
+798
+799
+800
+801
+802
+803
+804
+805
+806
+807
+808
+809
+810
+811
+812
+813
+814
+815
+816
+817
+818
+819
+820
+821
+822
+823
+824
+825
+826
+827
+828
+829
+830
+831
+832
+833
+834
+835
+836
+837
+838
+839
+840
+841
+842
+843
+844
+845
+846
+847
+848
+849
+850
+851
+852
+853
+854
+855
+856
+857
+858
+859
+860
+861
+862
+863
+864
+865
+866
+867
+868
+869
+870
+871
class NASNetAMobile(nn.Cell):
+    r"""NasNet model class, based on
+    `"Learning Transferable Architectures for Scalable Image Recognition" <https://arxiv.org/pdf/1707.07012v4.pdf>`_
+    Args:
+        num_classes: number of classification classes.
+        stem_filters: number of stem filters. Default: 32.
+        penultimate_filters: number of penultimate filters. Default: 1056.
+        filters_multiplier: size of filters multiplier. Default: 2.
+    """
+
+    def __init__(
+        self,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+        stem_filters: int = 32,
+        penultimate_filters: int = 1056,
+        filters_multiplier: int = 2,
+    ) -> None:
+        super().__init__()
+        self.stem_filters = stem_filters
+        self.penultimate_filters = penultimate_filters
+        self.filters_multiplier = filters_multiplier
+
+        filters = self.penultimate_filters // 24
+        # 24 is default value for the architecture
+
+        self.conv0 = nn.SequentialCell([
+            nn.Conv2d(in_channels=in_channels, out_channels=self.stem_filters, kernel_size=3, stride=2, pad_mode="pad",
+                      padding=0,
+                      has_bias=False),
+            nn.BatchNorm2d(num_features=self.stem_filters, eps=0.001, momentum=0.9, affine=True)
+        ])
+
+        self.cell_stem_0 = CellStem0(
+            self.stem_filters, num_filters=filters // (filters_multiplier ** 2)
+        )
+        self.cell_stem_1 = CellStem1(
+            self.stem_filters, num_filters=filters // filters_multiplier
+        )
+
+        self.cell_0 = FirstCell(
+            in_channels_left=filters,
+            out_channels_left=filters // 2,  # 1, 0.5
+            in_channels_right=2 * filters,
+            out_channels_right=filters,
+        )  # 2, 1
+        self.cell_1 = NormalCell(
+            in_channels_left=2 * filters,
+            out_channels_left=filters,  # 2, 1
+            in_channels_right=6 * filters,
+            out_channels_right=filters,
+        )  # 6, 1
+        self.cell_2 = NormalCell(
+            in_channels_left=6 * filters,
+            out_channels_left=filters,  # 6, 1
+            in_channels_right=6 * filters,
+            out_channels_right=filters,
+        )  # 6, 1
+        self.cell_3 = NormalCell(
+            in_channels_left=6 * filters,
+            out_channels_left=filters,  # 6, 1
+            in_channels_right=6 * filters,
+            out_channels_right=filters,
+        )  # 6, 1
+
+        self.reduction_cell_0 = ReductionCell0(
+            in_channels_left=6 * filters,
+            out_channels_left=2 * filters,  # 6, 2
+            in_channels_right=6 * filters,
+            out_channels_right=2 * filters,
+        )  # 6, 2
+
+        self.cell_6 = FirstCell(
+            in_channels_left=6 * filters,
+            out_channels_left=filters,  # 6, 1
+            in_channels_right=8 * filters,
+            out_channels_right=2 * filters,
+        )  # 8, 2
+        self.cell_7 = NormalCell(
+            in_channels_left=8 * filters,
+            out_channels_left=2 * filters,  # 8, 2
+            in_channels_right=12 * filters,
+            out_channels_right=2 * filters,
+        )  # 12, 2
+        self.cell_8 = NormalCell(
+            in_channels_left=12 * filters,
+            out_channels_left=2 * filters,  # 12, 2
+            in_channels_right=12 * filters,
+            out_channels_right=2 * filters,
+        )  # 12, 2
+        self.cell_9 = NormalCell(
+            in_channels_left=12 * filters,
+            out_channels_left=2 * filters,  # 12, 2
+            in_channels_right=12 * filters,
+            out_channels_right=2 * filters,
+        )  # 12, 2
+
+        self.reduction_cell_1 = ReductionCell1(
+            in_channels_left=12 * filters,
+            out_channels_left=4 * filters,  # 12, 4
+            in_channels_right=12 * filters,
+            out_channels_right=4 * filters,
+        )  # 12, 4
+
+        self.cell_12 = FirstCell(
+            in_channels_left=12 * filters,
+            out_channels_left=2 * filters,  # 12, 2
+            in_channels_right=16 * filters,
+            out_channels_right=4 * filters,
+        )  # 16, 4
+        self.cell_13 = NormalCell(
+            in_channels_left=16 * filters,
+            out_channels_left=4 * filters,  # 16, 4
+            in_channels_right=24 * filters,
+            out_channels_right=4 * filters,
+        )  # 24, 4
+        self.cell_14 = NormalCell(
+            in_channels_left=24 * filters,
+            out_channels_left=4 * filters,  # 24, 4
+            in_channels_right=24 * filters,
+            out_channels_right=4 * filters,
+        )  # 24, 4
+        self.cell_15 = NormalCell(
+            in_channels_left=24 * filters,
+            out_channels_left=4 * filters,  # 24, 4
+            in_channels_right=24 * filters,
+            out_channels_right=4 * filters,
+        )  # 24, 4
+
+        self.relu = nn.ReLU()
+        self.dropout = Dropout(p=0.5)
+        self.classifier = nn.Dense(in_channels=24 * filters, out_channels=num_classes)
+        self.pool = GlobalAvgPooling()
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        """Initialize weights for cells."""
+        self.init_parameters_data()
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                n = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels
+                cell.weight.set_data(init.initializer(init.Normal(math.sqrt(2. / n), 0),
+                                                      cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Zero(), cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer(init.One(), cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer(init.Zero(), cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(init.initializer(init.Normal(0.01, 0), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Zero(), cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        """Network forward feature extraction."""
+        x_conv0 = self.conv0(x)
+        x_stem_0 = self.cell_stem_0(x_conv0)
+        x_stem_1 = self.cell_stem_1(x_conv0, x_stem_0)
+
+        x_cell_0 = self.cell_0(x_stem_1, x_stem_0)
+        x_cell_1 = self.cell_1(x_cell_0, x_stem_1)
+        x_cell_2 = self.cell_2(x_cell_1, x_cell_0)
+        x_cell_3 = self.cell_3(x_cell_2, x_cell_1)
+
+        x_reduction_cell_0 = self.reduction_cell_0(x_cell_3, x_cell_2)
+
+        x_cell_6 = self.cell_6(x_reduction_cell_0, x_cell_3)
+        x_cell_7 = self.cell_7(x_cell_6, x_reduction_cell_0)
+        x_cell_8 = self.cell_8(x_cell_7, x_cell_6)
+        x_cell_9 = self.cell_9(x_cell_8, x_cell_7)
+
+        x_reduction_cell_1 = self.reduction_cell_1(x_cell_9, x_cell_8)
+
+        x_cell_12 = self.cell_12(x_reduction_cell_1, x_cell_9)
+        x_cell_13 = self.cell_13(x_cell_12, x_reduction_cell_1)
+        x_cell_14 = self.cell_14(x_cell_13, x_cell_12)
+        x_cell_15 = self.cell_15(x_cell_14, x_cell_13)
+
+        x_cell_15 = self.relu(x_cell_15)
+        return x_cell_15
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)  # global average pool
+        x = self.dropout(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.nasnet.NASNetAMobile.forward_features(x) + +

+ + +
+ +

Network forward feature extraction.

+ +
+ Source code in mindcv/models/nasnet.py +
834
+835
+836
+837
+838
+839
+840
+841
+842
+843
+844
+845
+846
+847
+848
+849
+850
+851
+852
+853
+854
+855
+856
+857
+858
+859
+860
def forward_features(self, x: Tensor) -> Tensor:
+    """Network forward feature extraction."""
+    x_conv0 = self.conv0(x)
+    x_stem_0 = self.cell_stem_0(x_conv0)
+    x_stem_1 = self.cell_stem_1(x_conv0, x_stem_0)
+
+    x_cell_0 = self.cell_0(x_stem_1, x_stem_0)
+    x_cell_1 = self.cell_1(x_cell_0, x_stem_1)
+    x_cell_2 = self.cell_2(x_cell_1, x_cell_0)
+    x_cell_3 = self.cell_3(x_cell_2, x_cell_1)
+
+    x_reduction_cell_0 = self.reduction_cell_0(x_cell_3, x_cell_2)
+
+    x_cell_6 = self.cell_6(x_reduction_cell_0, x_cell_3)
+    x_cell_7 = self.cell_7(x_cell_6, x_reduction_cell_0)
+    x_cell_8 = self.cell_8(x_cell_7, x_cell_6)
+    x_cell_9 = self.cell_9(x_cell_8, x_cell_7)
+
+    x_reduction_cell_1 = self.reduction_cell_1(x_cell_9, x_cell_8)
+
+    x_cell_12 = self.cell_12(x_reduction_cell_1, x_cell_9)
+    x_cell_13 = self.cell_13(x_cell_12, x_reduction_cell_1)
+    x_cell_14 = self.cell_14(x_cell_13, x_cell_12)
+    x_cell_15 = self.cell_15(x_cell_14, x_cell_13)
+
+    x_cell_15 = self.relu(x_cell_15)
+    return x_cell_15
+
+
+
+ +
+ + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.nasnet.nasnet_a_4x1056(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get NasNet model. +Refer to the base class models.NASNetAMobile for more details.

+ +
+ Source code in mindcv/models/nasnet.py +
874
+875
+876
+877
+878
+879
+880
+881
+882
@register_model
+def nasnet_a_4x1056(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> NASNetAMobile:
+    """Get NasNet model.
+    Refer to the base class `models.NASNetAMobile` for more details."""
+    default_cfg = default_cfgs["nasnet_a_4x1056"]
+    model = NASNetAMobile(in_channels=in_channels, num_classes=num_classes, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

pit

+ + + +
+ + + +

+mindcv.models.pit.pit_b(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PiT-B model. +Refer to the base class models.PoolingTransformer for more details.

+ +
+ Source code in mindcv/models/pit.py +
475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
@register_model
+def pit_b(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolingTransformer:
+    """Get PiT-B model.
+    Refer to the base class `models.PoolingTransformer` for more details."""
+    default_cfg = default_cfgs["pit_b"]
+    model = PoolingTransformer(
+        image_size=224,
+        patch_size=14,
+        stride=7,
+        base_dims=[64, 64, 64],
+        depth=[3, 6, 4],
+        heads=[4, 8, 16],
+        mlp_ratio=4.0,
+        num_classes=num_classes,
+        in_chans=in_channels,
+        **kwargs
+    )
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pit.pit_s(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PiT-S model. +Refer to the base class models.PoolingTransformer for more details.

+ +
+ Source code in mindcv/models/pit.py +
451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
@register_model
+def pit_s(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolingTransformer:
+    """Get PiT-S model.
+    Refer to the base class `models.PoolingTransformer` for more details."""
+    default_cfg = default_cfgs["pit_s"]
+    model = PoolingTransformer(
+        image_size=224,
+        patch_size=16,
+        stride=8,
+        base_dims=[48, 48, 48],
+        depth=[2, 6, 4],
+        heads=[3, 6, 12],
+        mlp_ratio=4.0,
+        num_classes=num_classes,
+        in_chans=in_channels,
+        **kwargs
+    )
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pit.pit_ti(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PiT-Ti model. +Refer to the base class models.PoolingTransformer for more details.

+ +
+ Source code in mindcv/models/pit.py +
403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
@register_model
+def pit_ti(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolingTransformer:
+    """Get PiT-Ti model.
+    Refer to the base class `models.PoolingTransformer` for more details."""
+    default_cfg = default_cfgs["pit_ti"]
+    model = PoolingTransformer(
+        image_size=224,
+        patch_size=16,
+        stride=8,
+        base_dims=[32, 32, 32],
+        depth=[2, 6, 4],
+        heads=[2, 4, 8],
+        mlp_ratio=4.0,
+        num_classes=num_classes,
+        in_chans=in_channels,
+        **kwargs
+    )
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pit.pit_xs(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PiT-XS model. +Refer to the base class models.PoolingTransformer for more details.

+ +
+ Source code in mindcv/models/pit.py +
427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
@register_model
+def pit_xs(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolingTransformer:
+    """Get PiT-XS model.
+    Refer to the base class `models.PoolingTransformer` for more details."""
+    default_cfg = default_cfgs["pit_xs"]
+    model = PoolingTransformer(
+        image_size=224,
+        patch_size=16,
+        stride=8,
+        base_dims=[48, 48, 48],
+        depth=[2, 6, 4],
+        heads=[2, 4, 8],
+        mlp_ratio=4.0,
+        num_classes=num_classes,
+        in_chans=in_channels,
+        **kwargs
+    )
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

poolformer

+ + +
+ + + +

+ mindcv.models.poolformer.PoolFormer + + +

+ + +
+

+ Bases: nn.Cell

+ + +

PoolFormer model class, based on +"MetaFormer Is Actually What You Need for Vision" <https://arxiv.org/pdf/2111.11418v3.pdf>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
layers +
+

number of blocks for the 4 stages

+
+

+

+
embed_dims +
+

the embedding dims for the 4 stages. Default: (64, 128, 320, 512)

+
+

+ + DEFAULT: + (64, 128, 320, 512) + +

+
mlp_ratios +
+

mlp ratios for the 4 stages. Default: (4, 4, 4, 4)

+
+

+ + DEFAULT: + (4, 4, 4, 4) + +

+
downsamples +
+

flags to apply downsampling or not. Default: (True, True, True, True)

+
+

+ + DEFAULT: + (True, True, True, True) + +

+
pool_size +
+

the pooling size for the 4 stages. Default: 3

+
+

+ + DEFAULT: + 3 + +

+
in_chans +
+

number of input channels. Default: 3

+
+

+ + DEFAULT: + 3 + +

+
num_classes +
+

number of classes for the image classification. Default: 1000

+
+

+ + DEFAULT: + 1000 + +

+
global_pool +
+

define the types of pooling layer. Default: avg

+
+

+ + DEFAULT: + 'avg' + +

+
norm_layer +
+

define the types of normalization. Default: nn.GroupNorm

+
+

+ + DEFAULT: + nn.GroupNorm + +

+
act_layer +
+

define the types of activation. Default: nn.GELU

+
+

+ + DEFAULT: + nn.GELU + +

+
in_patch_size +
+

specify the patch embedding for the input image. Default: 7

+
+

+ + DEFAULT: + 7 + +

+
in_stride +
+

specify the stride for the input image. Default: 4.

+
+

+ + DEFAULT: + 4 + +

+
in_pad +
+

specify the pad for the input image. Default: 2.

+
+

+ + DEFAULT: + 2 + +

+
down_patch_size +
+

specify the downsample. Default: 3.

+
+

+ + DEFAULT: + 3 + +

+
down_stride +
+

specify the downsample (patch embed.). Default: 2.

+
+

+ + DEFAULT: + 2 + +

+
down_pad +
+

specify the downsample (patch embed.). Default: 1.

+
+

+ + DEFAULT: + 1 + +

+
drop_rate +
+

dropout rate of the layer before main classifier. Default: 0.

+
+

+ + DEFAULT: + 0.0 + +

+
drop_path_rate +
+

Stochastic Depth. Default: 0.

+
+

+ + DEFAULT: + 0.0 + +

+
layer_scale_init_value +
+

LayerScale. Default: 1e-5.

+
+

+ + DEFAULT: + 1e-05 + +

+
fork_feat +
+

whether output features of the 4 stages, for dense prediction. Default: False.

+
+

+ + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/poolformer.py +
204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
class PoolFormer(nn.Cell):
+    r"""PoolFormer model class, based on
+    `"MetaFormer Is Actually What You Need for Vision" <https://arxiv.org/pdf/2111.11418v3.pdf>`_
+
+    Args:
+        layers: number of blocks for the 4 stages
+        embed_dims: the embedding dims for the 4 stages. Default: (64, 128, 320, 512)
+        mlp_ratios: mlp ratios for the 4 stages. Default: (4, 4, 4, 4)
+        downsamples: flags to apply downsampling or not. Default: (True, True, True, True)
+        pool_size: the pooling size for the 4 stages. Default: 3
+        in_chans: number of input channels. Default: 3
+        num_classes: number of classes for the image classification. Default: 1000
+        global_pool: define the types of pooling layer. Default: avg
+        norm_layer: define the types of normalization. Default: nn.GroupNorm
+        act_layer: define the types of activation. Default: nn.GELU
+        in_patch_size: specify the patch embedding for the input image. Default: 7
+        in_stride: specify the stride for the input image. Default: 4.
+        in_pad: specify the pad for the input image. Default: 2.
+        down_patch_size: specify the downsample. Default: 3.
+        down_stride: specify the downsample (patch embed.). Default: 2.
+        down_pad: specify the downsample (patch embed.). Default: 1.
+        drop_rate: dropout rate of the layer before main classifier. Default: 0.
+        drop_path_rate: Stochastic Depth. Default: 0.
+        layer_scale_init_value: LayerScale. Default: 1e-5.
+        fork_feat: whether output features of the 4 stages, for dense prediction. Default: False.
+    """
+
+    def __init__(
+        self,
+        layers,
+        embed_dims=(64, 128, 320, 512),
+        mlp_ratios=(4, 4, 4, 4),
+        downsamples=(True, True, True, True),
+        pool_size=3,
+        in_chans=3,
+        num_classes=1000,
+        global_pool="avg",
+        norm_layer=nn.GroupNorm,
+        act_layer=nn.GELU,
+        in_patch_size=7,
+        in_stride=4,
+        in_pad=2,
+        down_patch_size=3,
+        down_stride=2,
+        down_pad=1,
+        drop_rate=0.0,
+        drop_path_rate=0.0,
+        layer_scale_init_value=1e-5,
+        fork_feat=False,
+    ):
+        super().__init__()
+
+        if not fork_feat:
+            self.num_classes = num_classes
+        self.fork_feat = fork_feat
+
+        self.global_pool = global_pool
+        self.num_features = embed_dims[-1]
+        self.grad_checkpointing = False
+
+        self.patch_embed = PatchEmbed(
+            patch_size=in_patch_size, stride=in_stride, padding=in_pad,
+            in_chs=in_chans, embed_dim=embed_dims[0])
+
+        # set the main block in network
+        network = []
+        for i in range(len(layers)):
+            network.append(basic_blocks(
+                embed_dims[i], i, layers,
+                pool_size=pool_size, mlp_ratio=mlp_ratios[i],
+                act_layer=act_layer, norm_layer=norm_layer,
+                drop_rate=drop_rate, drop_path_rate=drop_path_rate,
+                layer_scale_init_value=layer_scale_init_value)
+            )
+            if i < len(layers) - 1 and (downsamples[i] or embed_dims[i] != embed_dims[i + 1]):
+                # downsampling between stages
+                network.append(PatchEmbed(
+                    in_chs=embed_dims[i], embed_dim=embed_dims[i + 1],
+                    patch_size=down_patch_size, stride=down_stride, padding=down_pad)
+                )
+
+        self.network = nn.SequentialCell(*network)
+        self.norm = norm_layer(1, embed_dims[-1])
+        self.head = nn.Dense(embed_dims[-1], num_classes, has_bias=True) if num_classes > 0 else Identity()
+        # self._initialize_weights()
+        self.cls_init_weights()
+
+    def cls_init_weights(self):
+        """Initialize weights for cells."""
+        for name, m in self.cells_and_names():
+            if isinstance(m, nn.Dense):
+                m.weight.set_data(
+                    init.initializer(init.TruncatedNormal(sigma=.02), m.weight.shape, m.weight.dtype))
+                if m.bias is not None:
+                    m.bias.set_data(
+                        init.initializer(init.Constant(0), m.bias.shape, m.bias.dtype))
+
+    def reset_classifier(self, num_classes, global_pool=None):
+        self.num_classes = num_classes
+        if global_pool is not None:
+            self.global_pool = global_pool
+        self.head = nn.Dense(self.num_features, num_classes) if num_classes > 0 else Identity()
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.patch_embed(x)
+        x = self.network(x)
+        if self.fork_feat:
+            # otuput features of four stages for dense prediction
+            return x
+        x = self.norm(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        return self.head(x.mean([-2, -1]))
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        return self.forward_head(x)
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.poolformer.PoolFormer.cls_init_weights() + +

+ + +
+ +

Initialize weights for cells.

+ +
+ Source code in mindcv/models/poolformer.py +
291
+292
+293
+294
+295
+296
+297
+298
+299
def cls_init_weights(self):
+    """Initialize weights for cells."""
+    for name, m in self.cells_and_names():
+        if isinstance(m, nn.Dense):
+            m.weight.set_data(
+                init.initializer(init.TruncatedNormal(sigma=.02), m.weight.shape, m.weight.dtype))
+            if m.bias is not None:
+                m.bias.set_data(
+                    init.initializer(init.Constant(0), m.bias.shape, m.bias.dtype))
+
+
+
+ +
+ + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.poolformer.poolformer_m36(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get poolformer_m36 model. +Refer to the base class models.PoolFormer for more details.

+ +
+ Source code in mindcv/models/poolformer.py +
359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
@register_model
+def poolformer_m36(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolFormer:
+    """Get poolformer_m36 model.
+    Refer to the base class `models.PoolFormer` for more details."""
+    default_cfg = default_cfgs["poolformer_m36"]
+    layers = (6, 6, 18, 6)
+    embed_dims = (96, 192, 384, 768)
+    model = PoolFormer(
+        in_chans=in_channels,
+        num_classes=num_classes,
+        layers=layers,
+        layer_scale_init_value=1e-6,
+        embed_dims=embed_dims,
+        **kwargs
+    )
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.poolformer.poolformer_m48(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get poolformer_m48 model. +Refer to the base class models.PoolFormer for more details.

+ +
+ Source code in mindcv/models/poolformer.py +
379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
@register_model
+def poolformer_m48(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolFormer:
+    """Get poolformer_m48 model.
+    Refer to the base class `models.PoolFormer` for more details."""
+    default_cfg = default_cfgs["poolformer_m48"]
+    layers = (8, 8, 24, 8)
+    embed_dims = (96, 192, 384, 768)
+    model = PoolFormer(
+        in_chans=in_channels,
+        num_classes=num_classes,
+        layers=layers,
+        layer_scale_init_value=1e-6,
+        embed_dims=embed_dims,
+        **kwargs
+    )
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.poolformer.poolformer_s12(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get poolformer_s12 model. +Refer to the base class models.PoolFormer for more details.

+ +
+ Source code in mindcv/models/poolformer.py +
324
+325
+326
+327
+328
+329
+330
+331
+332
@register_model
+def poolformer_s12(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolFormer:
+    """Get poolformer_s12 model.
+    Refer to the base class `models.PoolFormer` for more details."""
+    default_cfg = default_cfgs["poolformer_s12"]
+    model = PoolFormer(in_chans=in_channels, num_classes=num_classes, layers=(2, 2, 6, 2), **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.poolformer.poolformer_s24(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get poolformer_s24 model. +Refer to the base class models.PoolFormer for more details.

+ +
+ Source code in mindcv/models/poolformer.py +
335
+336
+337
+338
+339
+340
+341
+342
+343
@register_model
+def poolformer_s24(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolFormer:
+    """Get poolformer_s24 model.
+    Refer to the base class `models.PoolFormer` for more details."""
+    default_cfg = default_cfgs["poolformer_s24"]
+    model = PoolFormer(in_chans=in_channels, num_classes=num_classes, layers=(4, 4, 12, 4), **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.poolformer.poolformer_s36(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get poolformer_s36 model. +Refer to the base class models.PoolFormer for more details.

+ +
+ Source code in mindcv/models/poolformer.py +
346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
@register_model
+def poolformer_s36(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolFormer:
+    """Get poolformer_s36 model.
+    Refer to the base class `models.PoolFormer` for more details."""
+    default_cfg = default_cfgs["poolformer_s36"]
+    model = PoolFormer(
+        in_chans=in_channels, num_classes=num_classes, layers=(6, 6, 18, 6), layer_scale_init_value=1e-6, **kwargs
+    )
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

pvt

+ + +
+ + + +

+ mindcv.models.pvt.PyramidVisionTransformer + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Pyramid Vision Transformer model class, based on +"Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions" <https://arxiv.org/abs/2102.12122>_ # noqa: E501

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
img_size(int) +
+

size of a input image.

+
+

+

+
patch_size +
+

size of a single image patch.

+
+

+ + TYPE: + int) + + + DEFAULT: + 4 + +

+
in_chans +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int) + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int) + + + DEFAULT: + 1000 + +

+
embed_dims +
+

how many hidden dim in each PatchEmbed.

+
+

+ + TYPE: + list) + + + DEFAULT: + [64, 128, 320, 512] + +

+
num_heads +
+

number of attention head in each stage.

+
+

+ + TYPE: + list) + + + DEFAULT: + [1, 2, 5, 8] + +

+
mlp_ratios +
+

ratios of MLP hidden dims in each stage.

+
+

+ + TYPE: + list + + + DEFAULT: + [8, 8, 4, 4] + +

+
qkv_bias(bool) +
+

use bias in attention.

+
+

+

+
qk_scale(float) +
+

Scale multiplied by qk in attention(if not none), otherwise head_dim ** -0.5.

+
+

+

+
drop_rate(float) +
+

The drop rate for each block. Default: 0.0.

+
+

+

+
attn_drop_rate(float) +
+

The drop rate for attention. Default: 0.0.

+
+

+

+
drop_path_rate(float) +
+

The drop rate for drop path. Default: 0.0.

+
+

+

+
norm_layer(nn.Cell) +
+

Norm layer that will be used in blocks. Default: nn.LayerNorm.

+
+

+

+
depths +
+

number of Blocks.

+
+

+ + TYPE: + list) + + + DEFAULT: + [2, 2, 2, 2] + +

+
sr_ratios(list) +
+

stride and kernel size of each attention.

+
+

+

+
num_stages(int) +
+

number of stage. Default: 4.

+
+

+

+
+ +
+ Source code in mindcv/models/pvt.py +
170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
class PyramidVisionTransformer(nn.Cell):
+    r"""Pyramid Vision Transformer model class, based on
+    `"Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions" <https://arxiv.org/abs/2102.12122>`_  # noqa: E501
+
+    Args:
+        img_size(int) : size of a input image.
+        patch_size (int) : size of a single image patch.
+        in_chans (int) : number the channels of the input. Default: 3.
+        num_classes (int) : number of classification classes. Default: 1000.
+        embed_dims (list) : how many hidden dim in each PatchEmbed.
+        num_heads (list) : number of attention head in each stage.
+        mlp_ratios (list): ratios of MLP hidden dims in each stage.
+        qkv_bias(bool) : use bias in attention.
+        qk_scale(float) : Scale multiplied by qk in attention(if not none), otherwise head_dim ** -0.5.
+        drop_rate(float) : The drop rate for each block. Default: 0.0.
+        attn_drop_rate(float) : The drop rate for attention. Default: 0.0.
+        drop_path_rate(float) : The drop rate for drop path. Default: 0.0.
+        norm_layer(nn.Cell) : Norm layer that will be used in blocks. Default: nn.LayerNorm.
+        depths (list) : number of Blocks.
+        sr_ratios(list) : stride and kernel size of each attention.
+        num_stages(int) : number of stage. Default: 4.
+    """
+
+    def __init__(self, img_size=224, patch_size=4, in_chans=3, num_classes=1000, embed_dims=[64, 128, 320, 512],
+                 num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True, qk_scale=None, drop_rate=0.0,
+                 attn_drop_rate=0.0, drop_path_rate=0.0, norm_layer=nn.LayerNorm,
+                 depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1], num_stages=4):
+        super(PyramidVisionTransformer, self).__init__()
+        self.num_classes = num_classes
+        self.depths = depths
+        self.num_stages = num_stages
+        start = Tensor(0, mindspore.float32)
+        stop = Tensor(drop_path_rate, mindspore.float32)
+        dpr = [float(x) for x in ops.linspace(start, stop, sum(depths))]  # stochastic depth decay rule
+        cur = 0
+        b_list = []
+        self.pos_embed = []
+        self.pos_drop = Dropout(p=drop_rate)
+        for i in range(num_stages):
+            block = nn.CellList(
+                [Block(dim=embed_dims[i], num_heads=num_heads[i], mlp_ratio=mlp_ratios[i], qkv_bias=qkv_bias,
+                       qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + j],
+                       norm_layer=norm_layer, sr_ratio=sr_ratios[i])
+                 for j in range(depths[i])
+                 ])
+
+            b_list.append(block)
+            cur += depths[0]
+
+        self.patch_embed1 = PatchEmbed(img_size=img_size,
+                                       patch_size=patch_size,
+                                       in_chans=in_chans,
+                                       embed_dim=embed_dims[0])
+        num_patches = self.patch_embed1.num_patches
+        self.pos_embed1 = mindspore.Parameter(ops.zeros((1, num_patches, embed_dims[0]), mindspore.float16))
+        self.pos_drop1 = Dropout(p=drop_rate)
+
+        self.patch_embed2 = PatchEmbed(img_size=img_size // (2 ** (1 + 1)),
+                                       patch_size=2,
+                                       in_chans=embed_dims[1 - 1],
+                                       embed_dim=embed_dims[1])
+        num_patches = self.patch_embed2.num_patches
+        self.pos_embed2 = mindspore.Parameter(ops.zeros((1, num_patches, embed_dims[1]), mindspore.float16))
+        self.pos_drop2 = Dropout(p=drop_rate)
+
+        self.patch_embed3 = PatchEmbed(img_size=img_size // (2 ** (2 + 1)),
+                                       patch_size=2,
+                                       in_chans=embed_dims[2 - 1],
+                                       embed_dim=embed_dims[2])
+        num_patches = self.patch_embed3.num_patches
+        self.pos_embed3 = mindspore.Parameter(ops.zeros((1, num_patches, embed_dims[2]), mindspore.float16))
+        self.pos_drop3 = Dropout(p=drop_rate)
+
+        self.patch_embed4 = PatchEmbed(img_size // (2 ** (3 + 1)),
+                                       patch_size=2,
+                                       in_chans=embed_dims[3 - 1],
+                                       embed_dim=embed_dims[3])
+        num_patches = self.patch_embed4.num_patches + 1
+        self.pos_embed4 = mindspore.Parameter(ops.zeros((1, num_patches, embed_dims[3]), mindspore.float16))
+        self.pos_drop4 = Dropout(p=drop_rate)
+        self.Blocks = nn.CellList(b_list)
+
+        self.norm = norm_layer([embed_dims[3]])
+
+        # cls_token
+        self.cls_token = mindspore.Parameter(ops.zeros((1, 1, embed_dims[3]), mindspore.float32))
+
+        # classification head
+        self.head = nn.Dense(embed_dims[3], num_classes) if num_classes > 0 else Identity()
+        self.reshape = ops.reshape
+        self.transpose = ops.transpose
+        self.tile = ops.Tile()
+        self.Concat = ops.Concat(axis=1)
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Dense):
+                cell.weight.set_data(weight_init.initializer(weight_init.TruncatedNormal(sigma=0.02),
+                                                             cell.weight.shape, cell.weight.dtype))
+                if isinstance(cell, nn.Dense) and cell.bias is not None:
+                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.LayerNorm):
+                cell.gamma.set_data(weight_init.initializer(weight_init.One(), cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(weight_init.initializer(weight_init.Zero(), cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Conv2d):
+                fan_out = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels
+                fan_out //= cell.group
+                cell.weight.set_data(weight_init.initializer(weight_init.Normal(sigma=math.sqrt(2.0 / fan_out)),
+                                                             cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))
+
+    def get_classifier(self):
+        return self.head
+
+    def reset_classifier(self, num_classes, global_pool=""):
+        self.num_classes = num_classes
+        self.head = nn.Dense(self.embed_dim, num_classes) if num_classes > 0 else Identity()
+
+    def _get_pos_embed(self, pos_embed, ph, pw, H, W):
+        if H * W == self.patch_embed1.num_patches:
+            return pos_embed
+        else:
+            pos_embed = self.transpose(self.reshape(pos_embed, (1, ph, pw, -1)), (0, 3, 1, 2))
+            resize_bilinear = ops.ResizeBilinear((H, W))
+            pos_embed = resize_bilinear(pos_embed)
+
+            pos_embed = self.transpose(self.reshape(pos_embed, (1, -1, H * W)), (0, 2, 1))
+
+            return pos_embed
+
+    def forward_features(self, x):
+        B = x.shape[0]
+
+        x, (H, W) = self.patch_embed1(x)
+        pos_embed = self.pos_embed1
+        x = self.pos_drop1(x + pos_embed)
+        for blk in self.Blocks[0]:
+            x = blk(x, H, W)
+        x = self.transpose(self.reshape(x, (B, H, W, -1)), (0, 3, 1, 2))
+
+        x, (H, W) = self.patch_embed2(x)
+        ph, pw = self.patch_embed2.H, self.patch_embed2.W
+        pos_embed = self._get_pos_embed(self.pos_embed2, ph, pw, H, W)
+        x = self.pos_drop2(x + pos_embed)
+        for blk in self.Blocks[1]:
+            x = blk(x, H, W)
+        x = self.transpose(self.reshape(x, (B, H, W, -1)), (0, 3, 1, 2))
+
+        x, (H, W) = self.patch_embed3(x)
+        ph, pw = self.patch_embed3.H, self.patch_embed3.W
+        pos_embed = self._get_pos_embed(self.pos_embed3, ph, pw, H, W)
+        x = self.pos_drop3(x + pos_embed)
+        for blk in self.Blocks[2]:
+            x = blk(x, H, W)
+        x = self.transpose(self.reshape(x, (B, H, W, -1)), (0, 3, 1, 2))
+
+        x, (H, W) = self.patch_embed4(x)
+        cls_tokens = self.tile(self.cls_token, (B, 1, 1))
+
+        x = self.Concat((cls_tokens, x))
+        ph, pw = self.patch_embed4.H, self.patch_embed4.W
+        pos_embed_ = self._get_pos_embed(self.pos_embed4[:, 1:], ph, pw, H, W)
+        pos_embed = self.Concat((self.pos_embed4[:, 0:1], pos_embed_))
+        x = self.pos_drop4(x + pos_embed)
+        for blk in self.Blocks[3]:
+            x = blk(x, H, W)
+
+        x = self.norm(x)
+
+        return x[:, 0]
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        return self.head(x)
+
+    def construct(self, x):
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.pvt.pvt_large(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVT large model +Refer to the base class "models.PVT" for more details.

+ +
+ Source code in mindcv/models/pvt.py +
413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
@register_model
+def pvt_large(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformer:
+    """Get PVT large model
+    Refer to the base class "models.PVT" for more details.
+    """
+    default_cfg = default_cfgs['pvt_large']
+    model = PyramidVisionTransformer(in_chans=in_channels, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],
+                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 8, 27, 3],
+                                     sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvt.pvt_medium(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVT medium model +Refer to the base class "models.PVT" for more details.

+ +
+ Source code in mindcv/models/pvt.py +
393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
@register_model
+def pvt_medium(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformer:
+    """Get PVT medium model
+    Refer to the base class "models.PVT" for more details.
+    """
+    default_cfg = default_cfgs['pvt_medium']
+    model = PyramidVisionTransformer(in_chans=in_channels, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],
+                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 4, 18, 3],
+                                     sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvt.pvt_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVT small model +Refer to the base class "models.PVT" for more details.

+ +
+ Source code in mindcv/models/pvt.py +
373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
@register_model
+def pvt_small(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformer:
+    """Get PVT small model
+    Refer to the base class "models.PVT" for more details.
+    """
+    default_cfg = default_cfgs['pvt_small']
+    model = PyramidVisionTransformer(in_chans=in_channels, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],
+                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 4, 6, 3],
+                                     sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvt.pvt_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVT tiny model +Refer to the base class "models.PVT" for more details.

+ +
+ Source code in mindcv/models/pvt.py +
353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
@register_model
+def pvt_tiny(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformer:
+    """Get PVT tiny model
+    Refer to the base class "models.PVT" for more details.
+    """
+    default_cfg = default_cfgs['pvt_tiny']
+    model = PyramidVisionTransformer(in_chans=in_channels, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],
+                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[2, 2, 2, 2],
+                                     sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

pvtv2

+ + +
+ + + +

+ mindcv.models.pvtv2.PyramidVisionTransformerV2 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Pyramid Vision Transformer V2 model class, based on +"PVTv2: Improved Baselines with Pyramid Vision Transformer" <https://arxiv.org/abs/2106.13797>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
img_size(int) +
+

size of a input image.

+
+

+

+
patch_size +
+

size of a single image patch.

+
+

+ + TYPE: + int) + + + DEFAULT: + 16 + +

+
in_chans +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int) + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int) + + + DEFAULT: + 1000 + +

+
embed_dims +
+

how many hidden dim in each PatchEmbed.

+
+

+ + TYPE: + list) + + + DEFAULT: + [64, 128, 256, 512] + +

+
num_heads +
+

number of attention head in each stage.

+
+

+ + TYPE: + list) + + + DEFAULT: + [1, 2, 4, 8] + +

+
mlp_ratios +
+

ratios of MLP hidden dims in each stage.

+
+

+ + TYPE: + list + + + DEFAULT: + [4, 4, 4, 4] + +

+
qkv_bias(bool) +
+

use bias in attention.

+
+

+

+
qk_scale(float) +
+

Scale multiplied by qk in attention(if not none), otherwise head_dim ** -0.5.

+
+

+

+
drop_rate(float) +
+

The drop rate for each block. Default: 0.0.

+
+

+

+
attn_drop_rate(float) +
+

The drop rate for attention. Default: 0.0.

+
+

+

+
drop_path_rate(float) +
+

The drop rate for drop path. Default: 0.0.

+
+

+

+
norm_layer(nn.Cell) +
+

Norm layer that will be used in blocks. Default: nn.LayerNorm.

+
+

+

+
depths +
+

number of Blocks.

+
+

+ + TYPE: + list) + + + DEFAULT: + [3, 4, 6, 3] + +

+
sr_ratios(list) +
+

stride and kernel size of each attention.

+
+

+

+
num_stages(int) +
+

number of stage. Default: 4.

+
+

+

+
linear(bool) +
+

use linear SRA.

+
+

+

+
+ +
+ Source code in mindcv/models/pvtv2.py +
226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
class PyramidVisionTransformerV2(nn.Cell):
+    r"""Pyramid Vision Transformer V2 model class, based on
+    `"PVTv2: Improved Baselines with Pyramid Vision Transformer" <https://arxiv.org/abs/2106.13797>`_
+
+    Args:
+        img_size(int) : size of a input image.
+        patch_size (int) : size of a single image patch.
+        in_chans (int) : number the channels of the input. Default: 3.
+        num_classes (int) : number of classification classes. Default: 1000.
+        embed_dims (list) : how many hidden dim in each PatchEmbed.
+        num_heads (list) : number of attention head in each stage.
+        mlp_ratios (list): ratios of MLP hidden dims in each stage.
+        qkv_bias(bool) : use bias in attention.
+        qk_scale(float) : Scale multiplied by qk in attention(if not none), otherwise head_dim ** -0.5.
+        drop_rate(float) : The drop rate for each block. Default: 0.0.
+        attn_drop_rate(float) : The drop rate for attention. Default: 0.0.
+        drop_path_rate(float) : The drop rate for drop path. Default: 0.0.
+        norm_layer(nn.Cell) : Norm layer that will be used in blocks. Default: nn.LayerNorm.
+        depths (list) : number of Blocks.
+        sr_ratios(list) : stride and kernel size of each attention.
+        num_stages(int) : number of stage. Default: 4.
+        linear(bool) :  use linear SRA.
+    """
+
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dims=[64, 128, 256, 512],
+                 num_heads=[1, 2, 4, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=False, qk_scale=None, drop_rate=0.,
+                 attn_drop_rate=0., drop_path_rate=0., norm_layer=nn.LayerNorm,
+                 depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1], num_stages=4, linear=False):
+        super().__init__()
+        self.num_classes = num_classes
+        self.depths = depths
+        self.num_stages = num_stages
+
+        start = Tensor(0, mindspore.float32)
+        stop = Tensor(drop_path_rate, mindspore.float32)
+        dpr = [float(x) for x in ops.linspace(start, stop, sum(depths))]  # stochastic depth decay rule
+        cur = 0
+
+        patch_embed_list = []
+        block_list = []
+        norm_list = []
+
+        for i in range(num_stages):
+            patch_embed = OverlapPatchEmbed(img_size=img_size if i == 0 else img_size // (2 ** (i + 1)),
+                                            patch_size=7 if i == 0 else 3,
+                                            stride=4 if i == 0 else 2,
+                                            in_chans=in_chans if i == 0 else embed_dims[i - 1],
+                                            embed_dim=embed_dims[i])
+
+            block = nn.CellList([Block(
+                dim=embed_dims[i], num_heads=num_heads[i], mlp_ratio=mlp_ratios[i], qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + j], norm_layer=norm_layer,
+                sr_ratio=sr_ratios[i], linear=linear, block_id=j)
+                for j in range(depths[i])])
+
+            norm = norm_layer([embed_dims[i]])
+
+            cur += depths[i]
+
+            patch_embed_list.append(patch_embed)
+            block_list.append(block)
+            norm_list.append(norm)
+        self.patch_embed_list = nn.CellList(patch_embed_list)
+        self.block_list = nn.CellList(block_list)
+        self.norm_list = nn.CellList(norm_list)
+        # classification head
+        self.head = nn.Dense(embed_dims[3], num_classes) if num_classes > 0 else Identity()
+        self._initialize_weights()
+
+    def freeze_patch_emb(self):
+        self.patch_embed_list[0].requires_grad = False
+
+    def _initialize_weights(self):
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Dense):
+                cell.weight.set_data(weight_init.initializer(weight_init.TruncatedNormal(sigma=0.02),
+                                                             cell.weight.shape, cell.weight.dtype))
+                if isinstance(cell, nn.Dense) and cell.bias is not None:
+                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.LayerNorm):
+                cell.gamma.set_data(weight_init.initializer(weight_init.One(), cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(weight_init.initializer(weight_init.Zero(), cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Conv2d):
+                fan_out = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels
+                fan_out //= cell.group
+                cell.weight.set_data(weight_init.initializer(weight_init.Normal(sigma=math.sqrt(2.0 / fan_out)),
+                                                             cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))
+
+    def get_classifier(self):
+        return self.head
+
+    def reset_classifier(self, num_classes, global_pool=""):
+        self.num_classes = num_classes
+        self.head = nn.Dense(self.embed_dim, num_classes) if num_classes > 0 else Identity()
+
+    def forward_features(self, x):
+        B = x.shape[0]
+
+        for i in range(self.num_stages):
+            patch_embed = self.patch_embed_list[i]
+            block = self.block_list[i]
+            norm = self.norm_list[i]
+            x, H, W = patch_embed(x)
+            for blk in block:
+                x = blk(x, H, W)
+            x = norm(x)
+            if i != self.num_stages - 1:
+                x = ops.transpose(ops.reshape(x, (B, H, W, -1)), (0, 3, 1, 2))
+
+        return x.mean(axis=1)
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        return self.head(x)
+
+    def construct(self, x):
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.pvtv2.pvt_v2_b0(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVTV2-b0 model +Refer to the base class "models.PVTv2" for more details.

+ +
+ Source code in mindcv/models/pvtv2.py +
350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
@register_model
+def pvt_v2_b0(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformerV2:
+    """Get PVTV2-b0 model
+    Refer to the base class "models.PVTv2" for more details.
+    """
+    default_cfg = default_cfgs["pvt_v2_b0"]
+    model = PyramidVisionTransformerV2(
+        in_chans=in_channels, num_classes=num_classes,
+        patch_size=4, embed_dims=[32, 64, 160, 256], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvtv2.pvt_v2_b1(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVTV2-b1 model +Refer to the base class "models.PVTv2" for more details.

+ +
+ Source code in mindcv/models/pvtv2.py +
369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
@register_model
+def pvt_v2_b1(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformerV2:
+    """Get PVTV2-b1 model
+    Refer to the base class "models.PVTv2" for more details.
+    """
+    default_cfg = default_cfgs["pvt_v2_b1"]
+    model = PyramidVisionTransformerV2(
+        in_chans=in_channels, num_classes=num_classes,
+        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvtv2.pvt_v2_b2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVTV2-b2 model +Refer to the base class "models.PVTv2" for more details.

+ +
+ Source code in mindcv/models/pvtv2.py +
388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
@register_model
+def pvt_v2_b2(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformerV2:
+    """Get PVTV2-b2 model
+    Refer to the base class "models.PVTv2" for more details.
+    """
+    default_cfg = default_cfgs["pvt_v2_b2"]
+    model = PyramidVisionTransformerV2(
+        in_chans=in_channels, num_classes=num_classes,
+        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvtv2.pvt_v2_b3(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVTV2-b3 model +Refer to the base class "models.PVTv2" for more details.

+ +
+ Source code in mindcv/models/pvtv2.py +
407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
@register_model
+def pvt_v2_b3(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformerV2:
+    """Get PVTV2-b3 model
+    Refer to the base class "models.PVTv2" for more details.
+    """
+    default_cfg = default_cfgs["pvt_v2_b3"]
+    model = PyramidVisionTransformerV2(
+        in_chans=in_channels, num_classes=num_classes,
+        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 4, 18, 3], sr_ratios=[8, 4, 2, 1], **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvtv2.pvt_v2_b4(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVTV2-b4 model +Refer to the base class "models.PVTv2" for more details.

+ +
+ Source code in mindcv/models/pvtv2.py +
425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
@register_model
+def pvt_v2_b4(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformerV2:
+    """Get PVTV2-b4 model
+    Refer to the base class "models.PVTv2" for more details.
+    """
+    default_cfg = default_cfgs["pvt_v2_b4"]
+    model = PyramidVisionTransformerV2(
+        in_chans=in_channels, num_classes=num_classes,
+        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 8, 27, 3], sr_ratios=[8, 4, 2, 1], **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvtv2.pvt_v2_b5(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVTV2-b5 model +Refer to the base class "models.PVTv2" for more details.

+ +
+ Source code in mindcv/models/pvtv2.py +
443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
@register_model
+def pvt_v2_b5(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformerV2:
+    """Get PVTV2-b5 model
+    Refer to the base class "models.PVTv2" for more details.
+    """
+    default_cfg = default_cfgs["pvt_v2_b5"]
+    model = PyramidVisionTransformerV2(
+        in_chans=in_channels, num_classes=num_classes,
+        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 6, 40, 3], sr_ratios=[8, 4, 2, 1], **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

regnet

+ + + +
+ + + +

+mindcv.models.regnet.regnet_x_12gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
592
+593
+594
+595
+596
+597
+598
+599
@register_model
+def regnet_x_12gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_12gf"]
+    model = RegNet(73.36, 168, 2.37, 19, 112, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_16gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
602
+603
+604
+605
+606
+607
+608
+609
@register_model
+def regnet_x_16gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_16gf"]
+    model = RegNet(55.59, 216, 2.1, 22, 128, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_1_6gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
542
+543
+544
+545
+546
+547
+548
+549
@register_model
+def regnet_x_1_6gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_1_6gf"]
+    model = RegNet(34.01, 80, 2.25, 18, 24, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_200mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
502
+503
+504
+505
+506
+507
+508
+509
@register_model
+def regnet_x_200mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_200mf"]
+    model = RegNet(36.44, 24, 2.49, 13, 8, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_32gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
612
+613
+614
+615
+616
+617
+618
+619
@register_model
+def regnet_x_32gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_32gf"]
+    model = RegNet(69.86, 320, 2.0, 23, 168, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_3_2gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
552
+553
+554
+555
+556
+557
+558
+559
@register_model
+def regnet_x_3_2gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_3_2gf"]
+    model = RegNet(26.31, 88, 2.25, 25, 48, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_400mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
512
+513
+514
+515
+516
+517
+518
+519
@register_model
+def regnet_x_400mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_400mf"]
+    model = RegNet(24.48, 24, 2.54, 22, 16, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_4_0gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
562
+563
+564
+565
+566
+567
+568
+569
@register_model
+def regnet_x_4_0gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_4_0gf"]
+    model = RegNet(38.65, 96, 2.43, 23, 40, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_600mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
522
+523
+524
+525
+526
+527
+528
+529
@register_model
+def regnet_x_600mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_600mf"]
+    model = RegNet(36.97, 48, 2.24, 16, 24, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_6_4gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
572
+573
+574
+575
+576
+577
+578
+579
@register_model
+def regnet_x_6_4gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_6_4gf"]
+    model = RegNet(60.83, 184, 2.07, 17, 56, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_800mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
532
+533
+534
+535
+536
+537
+538
+539
@register_model
+def regnet_x_800mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_800mf"]
+    model = RegNet(35.73, 56, 2.28, 16, 16, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_8_0gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
582
+583
+584
+585
+586
+587
+588
+589
@register_model
+def regnet_x_8_0gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_8_0gf"]
+    model = RegNet(49.56, 80, 2.88, 23, 120, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_12gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
712
+713
+714
+715
+716
+717
+718
+719
@register_model
+def regnet_y_12gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_12gf"]
+    model = RegNet(73.36, 168, 2.37, 19, 112, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_16gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
722
+723
+724
+725
+726
+727
+728
+729
@register_model
+def regnet_y_16gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_16gf"]
+    model = RegNet(106.23, 200, 2.48, 18, 112, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_1_6gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
662
+663
+664
+665
+666
+667
+668
+669
@register_model
+def regnet_y_1_6gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_1_6gf"]
+    model = RegNet(20.71, 48, 2.65, 27, 24, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_200mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
622
+623
+624
+625
+626
+627
+628
+629
@register_model
+def regnet_y_200mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_200mf"]
+    model = RegNet(36.44, 24, 2.49, 13, 8, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_32gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
732
+733
+734
+735
+736
+737
+738
+739
@register_model
+def regnet_y_32gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_32gf"]
+    model = RegNet(115.89, 232, 2.53, 20, 232, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_3_2gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
672
+673
+674
+675
+676
+677
+678
+679
@register_model
+def regnet_y_3_2gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_3_2gf"]
+    model = RegNet(42.63, 80, 2.66, 21, 24, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_400mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
632
+633
+634
+635
+636
+637
+638
+639
@register_model
+def regnet_y_400mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_400mf"]
+    model = RegNet(27.89, 48, 2.09, 16, 8, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_4_0gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
682
+683
+684
+685
+686
+687
+688
+689
@register_model
+def regnet_y_4_0gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_4_0gf"]
+    model = RegNet(31.41, 96, 2.24, 22, 64, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_600mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
642
+643
+644
+645
+646
+647
+648
+649
@register_model
+def regnet_y_600mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_600mf"]
+    model = RegNet(32.54, 48, 2.32, 15, 16, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_6_4gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
692
+693
+694
+695
+696
+697
+698
+699
@register_model
+def regnet_y_6_4gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_6_4gf"]
+    model = RegNet(33.22, 112, 2.27, 25, 72, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_800mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
652
+653
+654
+655
+656
+657
+658
+659
@register_model
+def regnet_y_800mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_800mf"]
+    model = RegNet(38.84, 56, 2.4, 14, 16, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_8_0gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
702
+703
+704
+705
+706
+707
+708
+709
@register_model
+def regnet_y_8_0gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_8_0gf"]
+    model = RegNet(76.82, 192, 2.19, 17, 56, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

repmlp

+ + +
+ + + +

+ mindcv.models.repmlp.RepMLPNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

RepMLPNet model class, based on +"RepMLPNet: Hierarchical Vision MLP with Re-parameterized Locality" <https://arxiv.org/pdf/2112.11081v2.pdf>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
in_channels +
+

number of input channels. Default: 3.

+
+

+ + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+

+
patch_size +
+

size of a single image patch. Default: (4, 4)

+
+

+ + DEFAULT: + (4, 4) + +

+
num_blocks +
+

number of blocks per stage. Default: (2,2,6,2)

+
+

+ + DEFAULT: + (2, 2, 6, 2) + +

+
channels +
+

number of in_channels(channels[stage_idx]) and out_channels(channels[stage_idx + 1]) per stage. +Default: (192,384,768,1536)

+
+

+ + DEFAULT: + (192, 384, 768, 1536) + +

+
hs +
+

height of picture per stage. Default: (64,32,16,8)

+
+

+ + DEFAULT: + (64, 32, 16, 8) + +

+
ws +
+

width of picture per stage. Default: (64,32,16,8)

+
+

+ + DEFAULT: + (64, 32, 16, 8) + +

+
sharesets_nums +
+

number of share sets per stage. Default: (4,8,16,32)

+
+

+ + DEFAULT: + (4, 8, 16, 32) + +

+
reparam_conv_k +
+

convolution kernel size in local Perceptron. Default: (3,)

+
+

+ + DEFAULT: + (3) + +

+
globalperceptron_reduce +
+

Intermediate convolution output size +(in_channal = inchannal, out_channel = in_channel/globalperceptron_reduce) in globalperceptron. Default: 4

+
+

+ + DEFAULT: + 4 + +

+
use_checkpoint +
+

whether to use checkpoint

+
+

+ + DEFAULT: + False + +

+
deploy +
+

whether to use bias

+
+

+ + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/repmlp.py +
276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
class RepMLPNet(nn.Cell):
+    r"""RepMLPNet model class, based on
+    `"RepMLPNet: Hierarchical Vision MLP with Re-parameterized Locality" <https://arxiv.org/pdf/2112.11081v2.pdf>`_
+
+    Args:
+        in_channels: number of input channels. Default: 3.
+        num_classes: number of classification classes. Default: 1000.
+        patch_size: size of a single image patch. Default: (4, 4)
+        num_blocks: number of blocks per stage. Default: (2,2,6,2)
+        channels: number of in_channels(channels[stage_idx]) and out_channels(channels[stage_idx + 1]) per stage.
+            Default: (192,384,768,1536)
+        hs: height of picture per stage. Default: (64,32,16,8)
+        ws: width of picture per stage. Default: (64,32,16,8)
+        sharesets_nums: number of share sets per stage. Default: (4,8,16,32)
+        reparam_conv_k: convolution kernel size in local Perceptron. Default: (3,)
+        globalperceptron_reduce: Intermediate convolution output size
+            (in_channal = inchannal, out_channel = in_channel/globalperceptron_reduce) in globalperceptron. Default: 4
+        use_checkpoint: whether to use checkpoint
+        deploy: whether to use bias
+    """
+
+    def __init__(self,
+                 in_channels=3, num_class=1000,
+                 patch_size=(4, 4),
+                 num_blocks=(2, 2, 6, 2), channels=(192, 384, 768, 1536),
+                 hs=(64, 32, 16, 8), ws=(64, 32, 16, 8),
+                 sharesets_nums=(4, 8, 16, 32),
+                 reparam_conv_k=(3,),
+                 globalperceptron_reduce=4, use_checkpoint=False,
+                 deploy=False):
+        super().__init__()
+        num_stages = len(num_blocks)
+        assert num_stages == len(channels)
+        assert num_stages == len(hs)
+        assert num_stages == len(ws)
+        assert num_stages == len(sharesets_nums)
+
+        self.conv_embedding = conv_bn_relu(in_channels, channels[0], kernel_size=patch_size, stride=patch_size,
+                                           padding=0, has_bias=False)
+        self.conv2d = nn.Conv2d(in_channels, channels[0], kernel_size=patch_size, stride=patch_size, padding=0)
+
+        stages = []
+        embeds = []
+        for stage_idx in range(num_stages):
+            stage_blocks = [RepMLPNetUnit(channels=channels[stage_idx], h=hs[stage_idx], w=ws[stage_idx],
+                                          reparam_conv_k=reparam_conv_k,
+                                          globalperceptron_reduce=globalperceptron_reduce, ffn_expand=4,
+                                          num_sharesets=sharesets_nums[stage_idx],
+                                          deploy=deploy) for _ in range(num_blocks[stage_idx])]
+            stages.append(nn.CellList(stage_blocks))
+            if stage_idx < num_stages - 1:
+                embeds.append(
+                    conv_bn_relu(in_channels=channels[stage_idx], out_channels=channels[stage_idx + 1], kernel_size=2,
+                                 stride=2, padding=0))
+        self.stages = nn.CellList(stages)
+        self.embeds = nn.CellList(embeds)
+        self.head_norm = nn.BatchNorm2d(channels[-1]).set_train()
+        self.head = nn.Dense(channels[-1], num_class)
+
+        self.use_checkpoint = use_checkpoint
+        self.shape = ops.Shape()
+        self.reshape = ops.Reshape()
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        """Initialize weights for cells."""
+        for name, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                k = cell.group / (cell.in_channels * cell.kernel_size[0] * cell.kernel_size[1])
+                k = k ** 0.5
+                cell.weight.set_data(init.initializer(init.Uniform(k), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Uniform(k), cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.Dense):
+                k = 1 / cell.in_channels
+                k = k ** 0.5
+                cell.weight.set_data(init.initializer(init.Uniform(k), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Uniform(k), cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.conv_embedding(x)
+
+        for i, stage in enumerate(self.stages):
+            for block in stage:
+                x = block(x)
+
+            if i < len(self.stages) - 1:
+                embed = self.embeds[i]
+                x = embed(x)
+        x = self.head_norm(x)
+        shape = self.shape(x)
+        pool = nn.AvgPool2d(kernel_size=(shape[2], shape[3]))
+        x = pool(x)
+        return x.view(shape[0], -1)
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        return self.head(x)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        return self.forward_head(x)
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.repmlp.repmlp_b224(pretrained=False, image_size=224, num_classes=1000, in_channels=3, deploy=False, **kwargs) + +

+ + +
+ +

Get repmlp_b224 model. +Refer to the base class models.RepMLPNet for more details.

+ +
+ Source code in mindcv/models/repmlp.py +
418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
@register_model
+def repmlp_b224(pretrained: bool = False, image_size: int = 224, num_classes: int = 1000, in_channels=3,
+                deploy=False, **kwargs):
+    """Get repmlp_b224 model.
+    Refer to the base class `models.RepMLPNet` for more details."""
+    default_cfg = default_cfgs["repmlp_b224"]
+    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(96, 192, 384, 768), hs=(56, 28, 14, 7),
+                      ws=(56, 28, 14, 7),
+                      num_blocks=(2, 2, 12, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 32, 128),
+                      deploy=deploy)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repmlp.repmlp_b256(pretrained=False, image_size=256, num_classes=1000, in_channels=3, deploy=False, **kwargs) + +

+ + +
+ +

Get repmlp_b256 model. +Refer to the base class models.RepMLPNet for more details.

+ +
+ Source code in mindcv/models/repmlp.py +
434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
@register_model
+def repmlp_b256(pretrained: bool = False, image_size: int = 256, num_classes: int = 1000, in_channels=3,
+                deploy=False, **kwargs):
+    """Get repmlp_b256 model.
+    Refer to the base class `models.RepMLPNet` for more details."""
+    default_cfg = default_cfgs["repmlp_b256"]
+    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(96, 192, 384, 768), hs=(64, 32, 16, 8),
+                      ws=(64, 32, 16, 8),
+                      num_blocks=(2, 2, 12, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 32, 128),
+                      deploy=deploy)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repmlp.repmlp_d256(pretrained=False, image_size=256, num_classes=1000, in_channels=3, deploy=False, **kwargs) + +

+ + +
+ +

Get repmlp_d256 model. +Refer to the base class models.RepMLPNet for more details.

+ +
+ Source code in mindcv/models/repmlp.py +
450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
@register_model
+def repmlp_d256(pretrained: bool = False, image_size: int = 256, num_classes: int = 1000, in_channels=3,
+                deploy=False, **kwargs):
+    """Get repmlp_d256 model.
+    Refer to the base class `models.RepMLPNet` for more details."""
+    default_cfg = default_cfgs["repmlp_d256"]
+    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(80, 160, 320, 640), hs=(64, 32, 16, 8),
+                      ws=(64, 32, 16, 8),
+                      num_blocks=(2, 2, 18, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 16, 128),
+                      deploy=deploy)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repmlp.repmlp_l256(pretrained=False, image_size=256, num_classes=1000, in_channels=3, deploy=False, **kwargs) + +

+ + +
+ +

Get repmlp_l256 model. +Refer to the base class models.RepMLPNet for more details.

+ +
+ Source code in mindcv/models/repmlp.py +
466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
@register_model
+def repmlp_l256(pretrained: bool = False, image_size: int = 256, num_classes: int = 1000, in_channels=3,
+                deploy=False, **kwargs):
+    """Get repmlp_l256 model.
+    Refer to the base class `models.RepMLPNet` for more details."""
+    default_cfg = default_cfgs["repmlp_l256"]
+    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(96, 192, 384, 768), hs=(64, 32, 16, 8),
+                      ws=(64, 32, 16, 8),
+                      num_blocks=(2, 2, 18, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 32, 256),
+                      deploy=deploy)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repmlp.repmlp_t224(pretrained=False, image_size=224, num_classes=1000, in_channels=3, deploy=False, **kwargs) + +

+ + +
+ +

Get repmlp_t224 model. Refer to the base class models.RepMLPNet for more details.

+ +
+ Source code in mindcv/models/repmlp.py +
386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
@register_model
+def repmlp_t224(pretrained: bool = False, image_size: int = 224, num_classes: int = 1000, in_channels=3,
+                deploy=False, **kwargs):
+    """Get repmlp_t224 model. Refer to the base class `models.RepMLPNet` for more details."""
+    default_cfg = default_cfgs["repmlp_t224"]
+    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(64, 128, 256, 512), hs=(56, 28, 14, 7),
+                      ws=(56, 28, 14, 7),
+                      num_blocks=(2, 2, 6, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 16, 128),
+                      deploy=deploy)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repmlp.repmlp_t256(pretrained=False, image_size=256, num_classes=1000, in_channels=3, deploy=False, **kwargs) + +

+ + +
+ +

Get repmlp_t256 model. +Refer to the base class models.RepMLPNet for more details.

+ +
+ Source code in mindcv/models/repmlp.py +
402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
@register_model
+def repmlp_t256(pretrained: bool = False, image_size: int = 256, num_classes: int = 1000, in_channels=3,
+                deploy=False, **kwargs):
+    """Get repmlp_t256 model.
+    Refer to the base class `models.RepMLPNet` for more details."""
+    default_cfg = default_cfgs["repmlp_t256"]
+    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(64, 128, 256, 512), hs=(64, 32, 16, 8),
+                      ws=(64, 32, 16, 8),
+                      num_blocks=(2, 2, 6, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 16, 128),
+                      deploy=deploy)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

repvgg

+ + +
+ + + +

+ mindcv.models.repvgg.RepVGG + + +

+ + +
+

+ Bases: nn.Cell

+ + +

RepVGG model class, based on +"RepVGGBlock: An all-MLP Architecture for Vision" <https://arxiv.org/pdf/2101.03697>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_blocks +
+

number of RepVGGBlocks

+
+

+ + TYPE: + list) + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int) + + + DEFAULT: + 1000 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + in_channels) + + + DEFAULT: + 3 + +

+
width_multiplier +
+

the numbers of MLP Architecture.

+
+

+ + TYPE: + list) + + + DEFAULT: + None + +

+
override_group_map +
+

the numbers of MLP Architecture.

+
+

+ + TYPE: + dict) + + + DEFAULT: + None + +

+
deploy +
+

use rbr_reparam block or not. Default: False

+
+

+ + TYPE: + bool) + + + DEFAULT: + False + +

+
use_se +
+

use se_block or not. Default: False

+
+

+ + TYPE: + bool) + + + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/repvgg.py +
201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
class RepVGG(nn.Cell):
+    r"""RepVGG model class, based on
+    `"RepVGGBlock: An all-MLP Architecture for Vision" <https://arxiv.org/pdf/2101.03697>`_
+
+    Args:
+        num_blocks (list) : number of RepVGGBlocks
+        num_classes (int) : number of classification classes. Default: 1000.
+        in_channels (in_channels) : number the channels of the input. Default: 3.
+        width_multiplier (list) : the numbers of MLP Architecture.
+        override_group_map (dict) : the numbers of MLP Architecture.
+        deploy (bool) : use rbr_reparam block or not. Default: False
+        use_se (bool) : use se_block or not. Default: False
+    """
+
+    def __init__(self, num_blocks, num_classes=1000, in_channels=3, width_multiplier=None, override_group_map=None,
+                 deploy=False, use_se=False):
+        super().__init__()
+
+        assert len(width_multiplier) == 4
+
+        self.deploy = deploy
+        self.override_group_map = override_group_map or {}
+        self.use_se = use_se
+
+        assert 0 not in self.override_group_map
+
+        self.in_planes = min(64, int(64 * width_multiplier[0]))
+
+        self.stage0 = RepVGGBlock(in_channels=in_channels, out_channels=self.in_planes, kernel_size=3, stride=2,
+                                  padding=1,
+                                  deploy=self.deploy, use_se=self.use_se)
+        self.feature_info = [dict(chs=self.in_planes, reduction=2, name="stage0")]
+        self.cur_layer_idx = 1
+        self.stage1 = self._make_stage(
+            int(64 * width_multiplier[0]), num_blocks[0], stride=2)
+        self.feature_info.append(dict(chs=int(64 * width_multiplier[0]), reduction=4, name="stage1"))
+        self.stage2 = self._make_stage(
+            int(128 * width_multiplier[1]), num_blocks[1], stride=2)
+        self.feature_info.append(dict(chs=int(128 * width_multiplier[1]), reduction=8, name="stage2"))
+        self.stage3 = self._make_stage(
+            int(256 * width_multiplier[2]), num_blocks[2], stride=2)
+        self.feature_info.append(dict(chs=int(256 * width_multiplier[2]), reduction=16, name="stage3"))
+        self.stage4 = self._make_stage(
+            int(512 * width_multiplier[3]), num_blocks[3], stride=2)
+        self.feature_info.append(dict(chs=int(512 * width_multiplier[3]), reduction=32, name="stage4"))
+        self.gap = GlobalAvgPooling()
+        self.linear = nn.Dense(int(512 * width_multiplier[3]), num_classes)
+        self._initialize_weights()
+
+    def _make_stage(self, planes, num_blocks, stride):
+        strides = [stride] + [1] * (num_blocks - 1)
+        blocks = []
+        for s in strides:
+            cur_group = self.override_group_map.get(self.cur_layer_idx, 1)
+            blocks.append(RepVGGBlock(in_channels=self.in_planes, out_channels=planes, kernel_size=3,
+                                      stride=s, padding=1, group=cur_group, deploy=self.deploy,
+                                      use_se=self.use_se))
+            self.in_planes = planes
+            self.cur_layer_idx += 1
+
+        return nn.SequentialCell(blocks)
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(mode='fan_out', nonlinearity='relu'),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer('zeros', cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer('ones', cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer('zeros', cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(mode='fan_in', nonlinearity='sigmoid'),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer('zeros', cell.bias.shape, cell.bias.dtype))
+
+    def construct(self, x):
+        x = self.stage0(x)
+        x = self.stage1(x)
+        x = self.stage2(x)
+        x = self.stage3(x)
+        x = self.stage4(x)
+        x = self.gap(x)
+        x = self.linear(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_a0(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[0.75, 0.75, 0.75, 2.5]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
298
+299
+300
+301
+302
+303
+304
+305
+306
@register_model
+def repvgg_a0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[0.75, 0.75, 0.75, 2.5].
+    Refer to the base class `models.RepVGG` for more details.
+    """
+    default_cfg = default_cfgs["repvgg_a0"]
+    model_args = dict(num_blocks=[2, 4, 14, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[0.75, 0.75, 0.75, 2.5], override_group_map=None, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_a1(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[1.0, 1.0, 1.0, 2.5]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
309
+310
+311
+312
+313
+314
+315
+316
+317
@register_model
+def repvgg_a1(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[1.0, 1.0, 1.0, 2.5].
+     Refer to the base class `models.RepVGG` for more details.
+     """
+    default_cfg = default_cfgs["repvgg_a1"]
+    model_args = dict(num_blocks=[2, 4, 14, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[1.0, 1.0, 1.0, 2.5], override_group_map=None, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_a2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[1.5, 1.5, 1.5, 2.75]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
320
+321
+322
+323
+324
+325
+326
+327
+328
@register_model
+def repvgg_a2(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[1.5, 1.5, 1.5, 2.75].
+     Refer to the base class `models.RepVGG` for more details.
+     """
+    default_cfg = default_cfgs["repvgg_a2"]
+    model_args = dict(num_blocks=[2, 4, 14, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[1.5, 1.5, 1.5, 2.75], override_group_map=None, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_b0(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[1.0, 1.0, 1.0, 2.5]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
331
+332
+333
+334
+335
+336
+337
+338
+339
@register_model
+def repvgg_b0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[1.0, 1.0, 1.0, 2.5].
+     Refer to the base class `models.RepVGG` for more details.
+     """
+    default_cfg = default_cfgs['repvgg_b0']
+    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[1.0, 1.0, 1.0, 2.5], override_group_map=None, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_b1(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
342
+343
+344
+345
+346
+347
+348
+349
+350
@register_model
+def repvgg_b1(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0].
+     Refer to the base class `models.RepVGG` for more details.
+     """
+    default_cfg = default_cfgs['repvgg_b1']
+    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[2.0, 2.0, 2.0, 4.0], override_group_map=None, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_b1g2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
380
+381
+382
+383
+384
+385
+386
+387
+388
@register_model
+def repvgg_b1g2(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0].
+    Refer to the base class `models.RepVGG` for more details.
+    """
+    default_cfg = default_cfgs["repvgg_b1g2"]
+    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[2.0, 2.0, 2.0, 4.0], override_group_map=g2_map, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_b1g4(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
391
+392
+393
+394
+395
+396
+397
+398
+399
@register_model
+def repvgg_b1g4(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0].
+    Refer to the base class `models.RepVGG` for more details.
+    """
+    default_cfg = default_cfgs["repvgg_b1g4"]
+    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[2.0, 2.0, 2.0, 4.0], override_group_map=g4_map, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_b2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.5, 2.5, 2.5, 5.0]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
353
+354
+355
+356
+357
+358
+359
+360
+361
@register_model
+def repvgg_b2(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.5, 2.5, 2.5, 5.0].
+     Refer to the base class `models.RepVGG` for more details.
+     """
+    default_cfg = default_cfgs['repvgg_b2']
+    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[2.5, 2.5, 2.5, 5.0], override_group_map=None, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_b2g4(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.5, 2.5, 2.5, 5.0]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
402
+403
+404
+405
+406
+407
+408
+409
+410
@register_model
+def repvgg_b2g4(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.5, 2.5, 2.5, 5.0].
+    Refer to the base class `models.RepVGG` for more details.
+    """
+    default_cfg = default_cfgs["repvgg_b2g4"]
+    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[2.5, 2.5, 2.5, 5.0], override_group_map=g4_map, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_b3(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[3.0, 3.0, 3.0, 5.0]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
364
+365
+366
+367
+368
+369
+370
+371
+372
@register_model
+def repvgg_b3(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[3.0, 3.0, 3.0, 5.0].
+     Refer to the base class `models.RepVGG` for more details.
+     """
+    default_cfg = default_cfgs['repvgg_b3']
+    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[3.0, 3.0, 3.0, 5.0], override_group_map=None, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +

res2net

+ + +
+ + + +

+ mindcv.models.res2net.Res2Net + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Res2Net model class, based on +"Res2Net: A New Multi-scale Backbone Architecture" <https://arxiv.org/abs/1904.01169>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
block +
+

block of resnet.

+
+

+ + TYPE: + Type[nn.Cell] + +

+
layer_nums +
+

number of layers of each stage.

+
+

+ + TYPE: + List[int] + +

+
version +
+

variety of Res2Net, 'res2net' or 'res2net_v1b'. Default: 'res2net'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'res2net' + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
groups +
+

number of groups for group conv in blocks. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
base_width +
+

base width of pre group hidden channel in blocks. Default: 26.

+
+

+ + TYPE: + int + + + DEFAULT: + 26 + +

+
scale +
+

scale factor of Bottle2neck. Default: 4.

+
+

+ + DEFAULT: + 4 + +

+
norm +
+

normalization layer in blocks. Default: None.

+
+

+ + TYPE: + Optional[nn.Cell] + + + DEFAULT: + None + +

+
+ +
+ Source code in mindcv/models/res2net.py +
142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
class Res2Net(nn.Cell):
+    r"""Res2Net model class, based on
+    `"Res2Net: A New Multi-scale Backbone Architecture" <https://arxiv.org/abs/1904.01169>`_
+
+    Args:
+        block: block of resnet.
+        layer_nums: number of layers of each stage.
+        version: variety of Res2Net, 'res2net' or 'res2net_v1b'. Default: 'res2net'.
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number the channels of the input. Default: 3.
+        groups: number of groups for group conv in blocks. Default: 1.
+        base_width: base width of pre group hidden channel in blocks. Default: 26.
+        scale: scale factor of Bottle2neck. Default: 4.
+        norm: normalization layer in blocks. Default: None.
+    """
+
+    def __init__(
+        self,
+        block: Type[nn.Cell],
+        layer_nums: List[int],
+        version: str = "res2net",
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        groups: int = 1,
+        base_width: int = 26,
+        scale=4,
+        norm: Optional[nn.Cell] = None,
+    ) -> None:
+        super().__init__()
+        assert version in ["res2net", "res2net_v1b"]
+        self.version = version
+
+        if norm is None:
+            norm = nn.BatchNorm2d
+        self.norm = norm
+
+        self.num_classes = num_classes
+        self.input_channels = 64
+        self.groups = groups
+        self.base_width = base_width
+        self.scale = scale
+        if self.version == "res2net":
+            self.conv1 = nn.Conv2d(in_channels, self.input_channels, kernel_size=7,
+                                   stride=2, padding=3, pad_mode="pad")
+        elif self.version == "res2net_v1b":
+            self.conv1 = nn.SequentialCell([
+                nn.Conv2d(in_channels, self.input_channels // 2, kernel_size=3,
+                          stride=2, padding=1, pad_mode="pad"),
+                norm(self.input_channels // 2),
+                nn.ReLU(),
+                nn.Conv2d(self.input_channels // 2, self.input_channels // 2, kernel_size=3,
+                          stride=1, padding=1, pad_mode="pad"),
+                norm(self.input_channels // 2),
+                nn.ReLU(),
+                nn.Conv2d(self.input_channels // 2, self.input_channels, kernel_size=3,
+                          stride=1, padding=1, pad_mode="pad"),
+            ])
+
+        self.bn1 = norm(self.input_channels)
+        self.relu = nn.ReLU()
+        self.max_pool = nn.SequentialCell([
+            nn.Pad(paddings=((0, 0), (0, 0), (1, 1), (1, 1)), mode="CONSTANT"),
+            nn.MaxPool2d(kernel_size=3, stride=2)
+        ])
+        self.layer1 = self._make_layer(block, 64, layer_nums[0])
+        self.layer2 = self._make_layer(block, 128, layer_nums[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layer_nums[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layer_nums[3], stride=2)
+
+        self.pool = GlobalAvgPooling()
+        self.num_features = 512 * block.expansion
+        self.classifier = nn.Dense(self.num_features, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(math.sqrt(5), mode="fan_out", nonlinearity="relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="leaky_relu"),
+                                         cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="leaky_relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def _make_layer(
+        self,
+        block: Type[nn.Cell],
+        channels: int,
+        block_nums: int,
+        stride: int = 1,
+    ) -> nn.SequentialCell:
+        down_sample = None
+
+        if stride != 1 or self.input_channels != channels * block.expansion:
+            if stride == 1 or self.version == "res2net":
+                down_sample = nn.SequentialCell([
+                    nn.Conv2d(self.input_channels, channels * block.expansion, kernel_size=1, stride=stride),
+                    self.norm(channels * block.expansion)
+                ])
+            else:
+                down_sample = nn.SequentialCell([
+                    nn.AvgPool2d(kernel_size=stride, stride=stride, pad_mode="same"),
+                    nn.Conv2d(self.input_channels, channels * block.expansion, kernel_size=1, stride=1),
+                    self.norm(channels * block.expansion)
+                ])
+
+        layers = []
+        layers.append(
+            block(
+                self.input_channels,
+                channels,
+                stride=stride,
+                down_sample=down_sample,
+                groups=self.groups,
+                base_width=self.base_width,
+                scale=self.scale,
+                stype="stage",
+                norm=self.norm,
+            )
+        )
+        self.input_channels = channels * block.expansion
+
+        for _ in range(1, block_nums):
+            layers.append(
+                block(
+                    self.input_channels,
+                    channels,
+                    groups=self.groups,
+                    base_width=self.base_width,
+                    scale=self.scale,
+                    norm=self.norm,
+                )
+            )
+
+        return nn.SequentialCell(layers)
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.max_pool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.res2net.res2net101(pretrained=False, num_classes=1001, in_channels=3, **kwargs) + +

+ + +
+ +

Get 101 layers Res2Net model. +Refer to the base class models.Res2Net for more details.

+ +
+ Source code in mindcv/models/res2net.py +
326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
@register_model
+def res2net101(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):
+    """Get 101 layers Res2Net model.
+    Refer to the base class `models.Res2Net` for more details.
+    """
+    default_cfg = default_cfgs["res2net101"]
+    model = Res2Net(Bottle2neck, [3, 4, 23, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.res2net.res2net101_v1b(pretrained=False, num_classes=1001, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/res2net.py +
366
+367
+368
+369
+370
+371
+372
+373
+374
+375
@register_model
+def res2net101_v1b(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["res2net101_v1b"]
+    model = Res2Net(Bottle2neck, [3, 4, 23, 3], version="res2net_v1b", num_classes=num_classes,
+                    in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.res2net.res2net152(pretrained=False, num_classes=1001, in_channels=3, **kwargs) + +

+ + +
+ +

Get 152 layers Res2Net model. +Refer to the base class models.Res2Net for more details.

+ +
+ Source code in mindcv/models/res2net.py +
340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
@register_model
+def res2net152(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):
+    """Get 152 layers Res2Net model.
+    Refer to the base class `models.Res2Net` for more details.
+    """
+    default_cfg = default_cfgs["res2net152"]
+    model = Res2Net(Bottle2neck, [3, 8, 36, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.res2net.res2net152_v1b(pretrained=False, num_classes=1001, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/res2net.py +
378
+379
+380
+381
+382
+383
+384
+385
+386
+387
@register_model
+def res2net152_v1b(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["res2net152_v1b"]
+    model = Res2Net(Bottle2neck, [3, 8, 36, 3], version="res2net_v1b", num_classes=num_classes,
+                    in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.res2net.res2net50(pretrained=False, num_classes=1001, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers Res2Net model. +Refer to the base class models.Res2Net for more details.

+ +
+ Source code in mindcv/models/res2net.py +
312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
@register_model
+def res2net50(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):
+    """Get 50 layers Res2Net model.
+    Refer to the base class `models.Res2Net` for more details.
+    """
+    default_cfg = default_cfgs["res2net50"]
+    model = Res2Net(Bottle2neck, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.res2net.res2net50_v1b(pretrained=False, num_classes=1001, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/res2net.py +
354
+355
+356
+357
+358
+359
+360
+361
+362
+363
@register_model
+def res2net50_v1b(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["res2net50_v1b"]
+    model = Res2Net(Bottle2neck, [3, 4, 6, 3], version="res2net_v1b", num_classes=num_classes,
+                    in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

resnest

+ + +
+ + + +

+ mindcv.models.resnest.ResNeSt + + +

+ + +
+

+ Bases: nn.Cell

+ + +

ResNeSt model class, based on +"ResNeSt: Split-Attention Networks" <https://arxiv.org/abs/2004.08955>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
block +
+

Class for the residual block. Option is Bottleneck.

+
+

+ + TYPE: + Type[Bottleneck] + +

+
layers +
+

Numbers of layers in each block.

+
+

+ + TYPE: + List[int] + +

+
radix +
+

Number of groups for Split-Attention conv. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
group +
+

Number of groups for the conv in each bottleneck block. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
bottleneck_width +
+

bottleneck channels factor. Default: 64.

+
+

+ + TYPE: + int + + + DEFAULT: + 64 + +

+
num_classes +
+

Number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
dilated +
+

Applying dilation strategy to pretrained ResNeSt yielding a stride-8 model, + typically used in Semantic Segmentation. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
dilation +
+

Number of dilation in the conv. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
deep_stem +
+

three 3x3 convolution layers of widths stem_width, stem_width, stem_width * 2. + Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
stem_width +
+

number of channels in stem convolutions. Default: 64.

+
+

+ + TYPE: + int + + + DEFAULT: + 64 + +

+
avg_down +
+

use avg pooling for projection skip connection between stages/downsample. + Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
avd +
+

use avg pooling before or after split-attention conv. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
avd_first +
+

use avg pooling before or after split-attention conv. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
drop_rate +
+

Drop probability for the Dropout layer. Default: 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
norm_layer +
+

Normalization layer used in backbone network. Default: nn.BatchNorm2d.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + nn.BatchNorm2d + +

+
+ +
+ Source code in mindcv/models/resnest.py +
225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
class ResNeSt(nn.Cell):
+    r"""ResNeSt model class, based on
+    `"ResNeSt: Split-Attention Networks" <https://arxiv.org/abs/2004.08955>`_
+
+    Args:
+        block: Class for the residual block. Option is Bottleneck.
+        layers: Numbers of layers in each block.
+        radix: Number of groups for Split-Attention conv. Default: 1.
+        group: Number of groups for the conv in each bottleneck block. Default: 1.
+        bottleneck_width: bottleneck channels factor. Default: 64.
+        num_classes: Number of classification classes. Default: 1000.
+        dilated: Applying dilation strategy to pretrained ResNeSt yielding a stride-8 model,
+                 typically used in Semantic Segmentation. Default: False.
+        dilation: Number of dilation in the conv. Default: 1.
+        deep_stem: three 3x3 convolution layers of widths stem_width, stem_width, stem_width * 2.
+                   Default: False.
+        stem_width: number of channels in stem convolutions. Default: 64.
+        avg_down: use avg pooling for projection skip connection between stages/downsample.
+                  Default: False.
+        avd: use avg pooling before or after split-attention conv. Default: False.
+        avd_first: use avg pooling before or after split-attention conv. Default: False.
+        drop_rate: Drop probability for the Dropout layer. Default: 0.
+        norm_layer: Normalization layer used in backbone network. Default: nn.BatchNorm2d.
+    """
+
+    def __init__(
+        self,
+        block: Type[Bottleneck],
+        layers: List[int],
+        radix: int = 1,
+        group: int = 1,
+        bottleneck_width: int = 64,
+        num_classes: int = 1000,
+        dilated: bool = False,
+        dilation: int = 1,
+        deep_stem: bool = False,
+        stem_width: int = 64,
+        avg_down: bool = False,
+        avd: bool = False,
+        avd_first: bool = False,
+        drop_rate: float = 0.0,
+        norm_layer: nn.Cell = nn.BatchNorm2d,
+    ) -> None:
+        super(ResNeSt, self).__init__()
+        self.cardinality = group
+        self.bottleneck_width = bottleneck_width
+        # ResNet-D params
+        self.inplanes = stem_width * 2 if deep_stem else 64
+        self.avg_down = avg_down
+        # ResNeSt params
+        self.radix = radix
+        self.avd = avd
+        self.avd_first = avd_first
+
+        if deep_stem:
+            self.conv1 = nn.SequentialCell([
+                nn.Conv2d(3, stem_width, kernel_size=3, stride=2, pad_mode="pad",
+                          padding=1, has_bias=False),
+                norm_layer(stem_width),
+                nn.ReLU(),
+                nn.Conv2d(stem_width, stem_width, kernel_size=3, stride=1, pad_mode="pad",
+                          padding=1, has_bias=False),
+                norm_layer(stem_width),
+                nn.ReLU(),
+                nn.Conv2d(stem_width, stem_width * 2, kernel_size=3, stride=1, pad_mode="pad",
+                          padding=1, has_bias=False),
+            ])
+        else:
+            self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, pad_mode="pad", padding=3,
+                                   has_bias=False)
+
+        self.bn1 = norm_layer(self.inplanes)
+        self.relu = nn.ReLU()
+        self.feature_info = [dict(chs=self.inplanes, reduction=2, name="relu")]
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
+
+        self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer, is_first=False)
+        self.feature_info.append(dict(chs=block.expansion * 64, reduction=4, name='layer1'))
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_layer=norm_layer)
+        self.feature_info.append(dict(chs=block.expansion * 128, reduction=8, name='layer2'))
+
+        if dilated or dilation == 4:
+            self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2, norm_layer=norm_layer)
+            self.feature_info.append(dict(chs=block.expansion * 256, reduction=8, name='layer3'))
+            self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, norm_layer=norm_layer)
+            self.feature_info.append(dict(chs=block.expansion * 512, reduction=8, name='layer4'))
+        elif dilation == 2:
+            self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilation=1, norm_layer=norm_layer)
+            self.feature_info.append(dict(chs=block.expansion * 256, reduction=16, name='layer3'))
+            self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=2, norm_layer=norm_layer)
+            self.feature_info.append(dict(chs=block.expansion * 512, reduction=16, name='layer4'))
+        else:
+            self.layer3 = self._make_layer(block, 256, layers[2], stride=2, norm_layer=norm_layer)
+            self.feature_info.append(dict(chs=block.expansion * 256, reduction=16, name='layer3'))
+            self.layer4 = self._make_layer(block, 512, layers[3], stride=2, norm_layer=norm_layer)
+            self.feature_info.append(dict(chs=block.expansion * 512, reduction=32, name='layer4'))
+
+        self.avgpool = GlobalAvgPooling()
+        self.drop = Dropout(p=drop_rate) if drop_rate > 0.0 else None
+        self.fc = nn.Dense(512 * block.expansion, num_classes)
+
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(
+                        init.HeNormal(mode="fan_out", nonlinearity="relu"), cell.weight.shape, cell.weight.dtype
+                    )
+                )
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(
+                        init.HeUniform(mode="fan_in", nonlinearity="sigmoid"), cell.weight.shape, cell.weight.dtype
+                    )
+                )
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def _make_layer(
+        self,
+        block: Type[Bottleneck],
+        planes: int,
+        blocks: int,
+        stride: int = 1,
+        dilation: int = 1,
+        norm_layer: Optional[nn.Cell] = None,
+        is_first: bool = True,
+    ) -> nn.SequentialCell:
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            down_layers = []
+            if self.avg_down:
+                if dilation == 1:
+                    down_layers.append(nn.AvgPool2d(kernel_size=stride, stride=stride, pad_mode="valid"))
+                else:
+                    down_layers.append(nn.AvgPool2d(kernel_size=1, stride=1, pad_mode="valid"))
+
+                down_layers.append(nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1,
+                                             stride=1, has_bias=False))
+            else:
+                down_layers.append(
+                    nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride,
+                              has_bias=False))
+            down_layers.append(norm_layer(planes * block.expansion))
+            downsample = nn.SequentialCell(down_layers)
+
+        layers = []
+        if dilation == 1 or dilation == 2:
+            layers.append(
+                block(
+                    self.inplanes,
+                    planes,
+                    stride,
+                    downsample=downsample,
+                    radix=self.radix,
+                    cardinality=self.cardinality,
+                    bottleneck_width=self.bottleneck_width,
+                    avd=self.avd,
+                    avd_first=self.avd_first,
+                    dilation=1,
+                    is_first=is_first,
+                    norm_layer=norm_layer,
+                )
+            )
+        elif dilation == 4:
+            layers.append(
+                block(
+                    self.inplanes,
+                    planes,
+                    stride,
+                    downsample=downsample,
+                    radix=self.radix,
+                    cardinality=self.cardinality,
+                    bottleneck_width=self.bottleneck_width,
+                    avd=self.avd,
+                    avd_first=self.avd_first,
+                    dilation=2,
+                    is_first=is_first,
+                    norm_layer=norm_layer,
+                )
+            )
+        else:
+            raise ValueError(f"Unsupported model type {dilation}")
+
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(
+                block(
+                    self.inplanes,
+                    planes,
+                    radix=self.radix,
+                    cardinality=self.cardinality,
+                    bottleneck_width=self.bottleneck_width,
+                    avd=self.avd,
+                    avd_first=self.avd_first,
+                    dilation=dilation,
+                    norm_layer=norm_layer,
+                )
+            )
+
+        return nn.SequentialCell(layers)
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.avgpool(x)
+        if self.drop:
+            x = self.drop(x)
+        x = self.fc(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.resnest.resnest101(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/resnest.py +
494
+495
+496
+497
+498
+499
+500
+501
@register_model
+def resnest101(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["resnest101"]
+    model_args = dict(block=Bottleneck, layers=[3, 4, 23, 3], radix=2, group=1,
+                      bottleneck_width=64, num_classes=num_classes,
+                      deep_stem=True, stem_width=64, avg_down=True,
+                      avd=True, avd_first=False, **kwargs)
+    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnest.resnest14(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/resnest.py +
464
+465
+466
+467
+468
+469
+470
+471
@register_model
+def resnest14(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["resnest14"]
+    model_args = dict(block=Bottleneck, layers=[1, 1, 1, 1], radix=2, group=1,
+                      bottleneck_width=64, num_classes=num_classes,
+                      deep_stem=True, stem_width=32, avg_down=True,
+                      avd=True, avd_first=False, **kwargs)
+    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnest.resnest200(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/resnest.py +
504
+505
+506
+507
+508
+509
+510
+511
@register_model
+def resnest200(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["resnest200"]
+    model_args = dict(block=Bottleneck, layers=[3, 24, 36, 3], radix=2, group=1,
+                      bottleneck_width=64, num_classes=num_classes,
+                      deep_stem=True, stem_width=64, avg_down=True,
+                      avd=True, avd_first=False, **kwargs)
+    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnest.resnest26(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/resnest.py +
474
+475
+476
+477
+478
+479
+480
+481
@register_model
+def resnest26(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["resnest26"]
+    model_args = dict(block=Bottleneck, layers=[2, 2, 2, 2], radix=2, group=1,
+                      bottleneck_width=64, num_classes=num_classes,
+                      deep_stem=True, stem_width=32, avg_down=True,
+                      avd=True, avd_first=False, **kwargs)
+    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnest.resnest269(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/resnest.py +
514
+515
+516
+517
+518
+519
+520
+521
@register_model
+def resnest269(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["resnest269"]
+    model_args = dict(block=Bottleneck, layers=[3, 30, 48, 8], radix=2, group=1,
+                      bottleneck_width=64, num_classes=num_classes,
+                      deep_stem=True, stem_width=64, avg_down=True,
+                      avd=True, avd_first=False, **kwargs)
+    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnest.resnest50(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/resnest.py +
484
+485
+486
+487
+488
+489
+490
+491
@register_model
+def resnest50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["resnest50"]
+    model_args = dict(block=Bottleneck, layers=[3, 4, 6, 3], radix=2, group=1,
+                      bottleneck_width=64, num_classes=num_classes,
+                      deep_stem=True, stem_width=32, avg_down=True,
+                      avd=True, avd_first=False, **kwargs)
+    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +

resnet

+ + +
+ + + +

+ mindcv.models.resnet.ResNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

ResNet model class, based on +"Deep Residual Learning for Image Recognition" <https://arxiv.org/abs/1512.03385>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
block +
+

block of resnet.

+
+

+ + TYPE: + Type[Union[BasicBlock, Bottleneck]] + +

+
layers +
+

number of layers of each stage.

+
+

+ + TYPE: + List[int] + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
groups +
+

number of groups for group conv in blocks. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
base_width +
+

base width of pre group hidden channel in blocks. Default: 64.

+
+

+ + TYPE: + int + + + DEFAULT: + 64 + +

+
norm +
+

normalization layer in blocks. Default: None.

+
+

+ + TYPE: + Optional[nn.Cell] + + + DEFAULT: + None + +

+
+ +
+ Source code in mindcv/models/resnet.py +
163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
class ResNet(nn.Cell):
+    r"""ResNet model class, based on
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/abs/1512.03385>`_
+
+    Args:
+        block: block of resnet.
+        layers: number of layers of each stage.
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number the channels of the input. Default: 3.
+        groups: number of groups for group conv in blocks. Default: 1.
+        base_width: base width of pre group hidden channel in blocks. Default: 64.
+        norm: normalization layer in blocks. Default: None.
+    """
+
+    def __init__(
+        self,
+        block: Type[Union[BasicBlock, Bottleneck]],
+        layers: List[int],
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        groups: int = 1,
+        base_width: int = 64,
+        norm: Optional[nn.Cell] = None,
+    ) -> None:
+        super().__init__()
+        if norm is None:
+            norm = nn.BatchNorm2d
+
+        self.norm: nn.Cell = norm  # add type hints to make pylint happy
+        self.input_channels = 64
+        self.groups = groups
+        self.base_with = base_width
+
+        self.conv1 = nn.Conv2d(in_channels, self.input_channels, kernel_size=7,
+                               stride=2, pad_mode="pad", padding=3)
+        self.bn1 = norm(self.input_channels)
+        self.relu = nn.ReLU()
+        self.feature_info = [dict(chs=self.input_channels, reduction=2, name="relu")]
+        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.feature_info.append(dict(chs=block.expansion * 64, reduction=4, name="layer1"))
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.feature_info.append(dict(chs=block.expansion * 128, reduction=8, name="layer2"))
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.feature_info.append(dict(chs=block.expansion * 256, reduction=16, name="layer3"))
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+        self.feature_info.append(dict(chs=block.expansion * 512, reduction=32, name="layer4"))
+
+        self.pool = GlobalAvgPooling()
+        self.num_features = 512 * block.expansion
+        self.classifier = nn.Dense(self.num_features, num_classes)
+
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(mode='fan_out', nonlinearity='relu'),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer('zeros', cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer('ones', cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer('zeros', cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(mode='fan_in', nonlinearity='sigmoid'),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer('zeros', cell.bias.shape, cell.bias.dtype))
+
+    def _make_layer(
+        self,
+        block: Type[Union[BasicBlock, Bottleneck]],
+        channels: int,
+        block_nums: int,
+        stride: int = 1,
+    ) -> nn.SequentialCell:
+        """build model depending on cfgs"""
+        down_sample = None
+
+        if stride != 1 or self.input_channels != channels * block.expansion:
+            down_sample = nn.SequentialCell([
+                nn.Conv2d(self.input_channels, channels * block.expansion, kernel_size=1, stride=stride),
+                self.norm(channels * block.expansion)
+            ])
+
+        layers = []
+        layers.append(
+            block(
+                self.input_channels,
+                channels,
+                stride=stride,
+                down_sample=down_sample,
+                groups=self.groups,
+                base_width=self.base_with,
+                norm=self.norm,
+            )
+        )
+        self.input_channels = channels * block.expansion
+
+        for _ in range(1, block_nums):
+            layers.append(
+                block(
+                    self.input_channels,
+                    channels,
+                    groups=self.groups,
+                    base_width=self.base_with,
+                    norm=self.norm
+                )
+            )
+
+        return nn.SequentialCell(layers)
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        """Network forward feature extraction."""
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.max_pool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.resnet.ResNet.forward_features(x) + +

+ + +
+ +

Network forward feature extraction.

+ +
+ Source code in mindcv/models/resnet.py +
280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
def forward_features(self, x: Tensor) -> Tensor:
+    """Network forward feature extraction."""
+    x = self.conv1(x)
+    x = self.bn1(x)
+    x = self.relu(x)
+    x = self.max_pool(x)
+
+    x = self.layer1(x)
+    x = self.layer2(x)
+    x = self.layer3(x)
+    x = self.layer4(x)
+    return x
+
+
+
+ +
+ + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnet101(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 101 layers ResNet model. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
341
+342
+343
+344
+345
+346
+347
+348
+349
@register_model
+def resnet101(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 101 layers ResNet model.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnet101"]
+    model_args = dict(block=Bottleneck, layers=[3, 4, 23, 3], num_classes=num_classes, in_channels=in_channels,
+                      **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnet152(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 152 layers ResNet model. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
352
+353
+354
+355
+356
+357
+358
+359
+360
@register_model
+def resnet152(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 152 layers ResNet model.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnet152"]
+    model_args = dict(block=Bottleneck, layers=[3, 8, 36, 3], num_classes=num_classes, in_channels=in_channels,
+                      **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnet18(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 18 layers ResNet model. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
308
+309
+310
+311
+312
+313
+314
+315
+316
@register_model
+def resnet18(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 18 layers ResNet model.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnet18"]
+    model_args = dict(block=BasicBlock, layers=[2, 2, 2, 2], num_classes=num_classes, in_channels=in_channels,
+                      **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnet34(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 34 layers ResNet model. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
319
+320
+321
+322
+323
+324
+325
+326
+327
@register_model
+def resnet34(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 34 layers ResNet model.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnet34"]
+    model_args = dict(block=BasicBlock, layers=[3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels,
+                      **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnet50(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers ResNet model. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
330
+331
+332
+333
+334
+335
+336
+337
+338
@register_model
+def resnet50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 50 layers ResNet model.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnet50"]
+    model_args = dict(block=Bottleneck, layers=[3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels,
+                      **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnext101_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 101 layers ResNeXt model with 32 groups of GPConv. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
374
+375
+376
+377
+378
+379
+380
+381
+382
@register_model
+def resnext101_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 101 layers ResNeXt model with 32 groups of GPConv.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnext101_32x4d"]
+    model_args = dict(block=Bottleneck, layers=[3, 4, 23, 3], groups=32, base_width=4, num_classes=num_classes,
+                      in_channels=in_channels, **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnext101_64x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 101 layers ResNeXt model with 64 groups of GPConv. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
385
+386
+387
+388
+389
+390
+391
+392
+393
@register_model
+def resnext101_64x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 101 layers ResNeXt model with 64 groups of GPConv.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnext101_64x4d"]
+    model_args = dict(block=Bottleneck, layers=[3, 4, 23, 3], groups=64, base_width=4, num_classes=num_classes,
+                      in_channels=in_channels, **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnext152_64x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/resnet.py +
396
+397
+398
+399
+400
+401
@register_model
+def resnext152_64x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["resnext152_64x4d"]
+    model_args = dict(block=Bottleneck, layers=[3, 8, 36, 3], groups=64, base_width=4, num_classes=num_classes,
+                      in_channels=in_channels, **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnext50_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers ResNeXt model with 32 groups of GPConv. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
363
+364
+365
+366
+367
+368
+369
+370
+371
@register_model
+def resnext50_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 50 layers ResNeXt model with 32 groups of GPConv.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnext50_32x4d"]
+    model_args = dict(block=Bottleneck, layers=[3, 4, 6, 3], groups=32, base_width=4, num_classes=num_classes,
+                      in_channels=in_channels, **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +

resnetv2

+ + + +
+ + + +

+mindcv.models.resnetv2.resnetv2_101(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 101 layers ResNetV2 model. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnetv2.py +
108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
@register_model
+def resnetv2_101(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 101 layers ResNetV2 model.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnetv2_101"]
+    model = ResNet(PreActBottleneck, [3, 4, 23, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnetv2.resnetv2_50(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers ResNetV2 model. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnetv2.py +
 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
@register_model
+def resnetv2_50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 50 layers ResNetV2 model.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs['resnetv2_50']
+    model = ResNet(PreActBottleneck, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

rexnet

+ + +
+ + + +

+ mindcv.models.rexnet.ReXNetV1 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

ReXNet model class, based on +"Rethinking Channel Dimensions for Efficient Model Design" <https://arxiv.org/abs/2007.00992>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
in_channels +
+

number of the input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
fi_channels +
+

number of the final channels. Default: 180.

+
+

+ + TYPE: + int + + + DEFAULT: + 180 + +

+
initial_channels +
+

initialize inplanes. Default: 16.

+
+

+ + TYPE: + int + + + DEFAULT: + 16 + +

+
width_mult +
+

The ratio of the channel. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
depth_mult +
+

The ratio of num_layers. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int) + + + DEFAULT: + 1000 + +

+
use_se +
+

use SENet in LinearBottleneck. Default: True.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
se_ratio +
+

(float): SENet reduction ratio. Default 1/12.

+
+

+ + DEFAULT: + 1 / 12 + +

+
drop_rate +
+

dropout ratio. Default: 0.2.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.2 + +

+
ch_div +
+

divisible by ch_div. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
act_layer +
+

activation function in ConvNormAct. Default: nn.SiLU.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + nn.SiLU + +

+
dw_act_layer +
+

activation function after dw_conv. Default: nn.ReLU6.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + nn.ReLU6 + +

+
cls_useconv +
+

use conv in classification. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/rexnet.py +
106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
class ReXNetV1(nn.Cell):
+    r"""ReXNet model class, based on
+    `"Rethinking Channel Dimensions for Efficient Model Design" <https://arxiv.org/abs/2007.00992>`_
+
+    Args:
+        in_channels (int): number of the input channels. Default: 3.
+        fi_channels (int): number of the final channels. Default: 180.
+        initial_channels (int): initialize inplanes. Default: 16.
+        width_mult (float): The ratio of the channel. Default: 1.0.
+        depth_mult (float): The ratio of num_layers. Default: 1.0.
+        num_classes (int) : number of classification classes. Default: 1000.
+        use_se (bool): use SENet in LinearBottleneck. Default: True.
+        se_ratio: (float): SENet reduction ratio. Default 1/12.
+        drop_rate (float): dropout ratio. Default: 0.2.
+        ch_div (int): divisible by ch_div. Default: 1.
+        act_layer (nn.Cell): activation function in ConvNormAct. Default: nn.SiLU.
+        dw_act_layer (nn.Cell): activation function after dw_conv. Default: nn.ReLU6.
+        cls_useconv (bool): use conv in classification. Default: False.
+    """
+
+    def __init__(
+        self,
+        in_channels=3,
+        fi_channels=180,
+        initial_channels=16,
+        width_mult=1.0,
+        depth_mult=1.0,
+        num_classes=1000,
+        use_se=True,
+        se_ratio=1 / 12,
+        drop_rate=0.2,
+        drop_path_rate=0.0,
+        ch_div=1,
+        act_layer=nn.SiLU,
+        dw_act_layer=nn.ReLU6,
+        cls_useconv=False,
+    ):
+        super(ReXNetV1, self).__init__()
+
+        layers = [1, 2, 2, 3, 3, 5]
+        strides = [1, 2, 2, 2, 1, 2]
+        use_ses = [False, False, True, True, True, True]
+
+        layers = [ceil(element * depth_mult) for element in layers]
+        strides = sum([[element] + [1] * (layers[idx] - 1)
+                       for idx, element in enumerate(strides)], [])
+        if use_se:
+            use_ses = sum([[element] * layers[idx] for idx, element in enumerate(use_ses)], [])
+        else:
+            use_ses = [False] * sum(layers[:])
+        exp_ratios = [1] * layers[0] + [6] * sum(layers[1:])
+
+        self.depth = sum(layers[:]) * 3
+        stem_channel = 32 / width_mult if width_mult < 1.0 else 32
+        inplanes = initial_channels / width_mult if width_mult < 1.0 else initial_channels
+
+        features = []
+        in_channels_group = []
+        out_channels_group = []
+
+        for i in range(self.depth // 3):
+            if i == 0:
+                in_channels_group.append(int(round(stem_channel * width_mult)))
+                out_channels_group.append(int(round(inplanes * width_mult)))
+            else:
+                in_channels_group.append(int(round(inplanes * width_mult)))
+                inplanes += fi_channels / (self.depth // 3 * 1.0)
+                out_channels_group.append(int(round(inplanes * width_mult)))
+
+        stem_chs = make_divisible(round(stem_channel * width_mult), divisor=ch_div)
+        self.stem = Conv2dNormActivation(in_channels, stem_chs, stride=2, padding=1, activation=act_layer)
+
+        feat_chs = [stem_chs]
+        self.feature_info = []
+        curr_stride = 2
+        features = []
+        num_blocks = len(in_channels_group)
+        for block_idx, (in_c, out_c, exp_ratio, stride, use_se) in enumerate(
+            zip(in_channels_group, out_channels_group, exp_ratios, strides, use_ses)
+        ):
+            if stride > 1:
+                fname = "stem" if block_idx == 0 else f"features.{block_idx - 1}"
+                self.feature_info += [dict(chs=feat_chs[-1], reduction=curr_stride, name=fname)]
+            block_dpr = drop_path_rate * block_idx / (num_blocks - 1)  # stochastic depth linear decay rule
+            drop_path = DropPath(block_dpr) if block_dpr > 0. else None
+            features.append(LinearBottleneck(in_channels=in_c,
+                                             out_channels=out_c,
+                                             exp_ratio=exp_ratio,
+                                             stride=stride,
+                                             use_se=use_se,
+                                             se_ratio=se_ratio,
+                                             act_layer=act_layer,
+                                             dw_act_layer=dw_act_layer,
+                                             drop_path=drop_path))
+            curr_stride *= stride
+            feat_chs.append(out_c)
+
+        pen_channels = make_divisible(int(1280 * width_mult), divisor=ch_div)
+        self.feature_info += [dict(chs=feat_chs[-1], reduction=curr_stride, name=f'features.{len(features) - 1}')]
+        self.flatten_sequential = True
+        features.append(Conv2dNormActivation(out_channels_group[-1],
+                                             pen_channels,
+                                             kernel_size=1,
+                                             activation=act_layer))
+
+        features.append(GlobalAvgPooling(keep_dims=True))
+        self.useconv = cls_useconv
+        self.features = nn.SequentialCell(*features)
+        if self.useconv:
+            self.cls = nn.SequentialCell(
+                Dropout(p=drop_rate),
+                nn.Conv2d(pen_channels, num_classes, 1, has_bias=True))
+        else:
+            self.cls = nn.SequentialCell(
+                Dropout(p=drop_rate),
+                nn.Dense(pen_channels, num_classes))
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, (nn.Conv2d, nn.Dense)):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="leaky_relu"),
+                                         [1, cell.bias.shape[0]], cell.bias.dtype).reshape((-1)))
+
+    def forward_features(self, x):
+        x = self.stem(x)
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x):
+        if not self.useconv:
+            x = x.reshape((x.shape[0], -1))
+            x = self.cls(x)
+        else:
+            x = self.cls(x).reshape((x.shape[0], -1))
+        return x
+
+    def construct(self, x):
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.rexnet.rexnet_09(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ReXNet model with width multiplier of 0.9. +Refer to the base class models.ReXNetV1 for more details.

+ +
+ Source code in mindcv/models/rexnet.py +
269
+270
+271
+272
+273
+274
@register_model
+def rexnet_09(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ReXNetV1:
+    """Get ReXNet model with width multiplier of 0.9.
+    Refer to the base class `models.ReXNetV1` for more details.
+    """
+    return _rexnet("rexnet_09", 0.9, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.rexnet.rexnet_10(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ReXNet model with width multiplier of 1.0. +Refer to the base class models.ReXNetV1 for more details.

+ +
+ Source code in mindcv/models/rexnet.py +
277
+278
+279
+280
+281
+282
@register_model
+def rexnet_10(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ReXNetV1:
+    """Get ReXNet model with width multiplier of 1.0.
+    Refer to the base class `models.ReXNetV1` for more details.
+    """
+    return _rexnet("rexnet_10", 1.0, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.rexnet.rexnet_13(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ReXNet model with width multiplier of 1.3. +Refer to the base class models.ReXNetV1 for more details.

+ +
+ Source code in mindcv/models/rexnet.py +
285
+286
+287
+288
+289
+290
@register_model
+def rexnet_13(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ReXNetV1:
+    """Get ReXNet model with width multiplier of 1.3.
+    Refer to the base class `models.ReXNetV1` for more details.
+    """
+    return _rexnet("rexnet_13", 1.3, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.rexnet.rexnet_15(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ReXNet model with width multiplier of 1.5. +Refer to the base class models.ReXNetV1 for more details.

+ +
+ Source code in mindcv/models/rexnet.py +
293
+294
+295
+296
+297
+298
@register_model
+def rexnet_15(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ReXNetV1:
+    """Get ReXNet model with width multiplier of 1.5.
+    Refer to the base class `models.ReXNetV1` for more details.
+    """
+    return _rexnet("rexnet_15", 1.5, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.rexnet.rexnet_20(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ReXNet model with width multiplier of 2.0. +Refer to the base class models.ReXNetV1 for more details.

+ +
+ Source code in mindcv/models/rexnet.py +
301
+302
+303
+304
+305
+306
@register_model
+def rexnet_20(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ReXNetV1:
+    """Get ReXNet model with width multiplier of 2.0.
+    Refer to the base class `models.ReXNetV1` for more details.
+    """
+    return _rexnet("rexnet_20", 2.0, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +

senet

+ + +
+ + + +

+ mindcv.models.senet.SENet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

SENet model class, based on +"Squeeze-and-Excitation Networks" <https://arxiv.org/abs/1709.01507>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
block +
+

block class of SENet.

+
+

+ + TYPE: + Type[Union[SEBottleneck, SEResNetBottleneck, SEResNetBlock, SEResNeXtBottleneck]] + +

+
layers +
+

Number of residual blocks for 4 layers.

+
+

+ + TYPE: + List[int] + +

+
group +
+

Number of groups for the conv in each bottleneck block.

+
+

+ + TYPE: + int + +

+
reduction +
+

Reduction ratio for Squeeze-and-Excitation modules.

+
+

+ + TYPE: + int + +

+
drop_rate +
+

Drop probability for the Dropout layer. Default: 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
inplanes +
+

Number of input channels for layer1. Default: 64.

+
+

+ + TYPE: + int + + + DEFAULT: + 64 + +

+
input3x3 +
+

If True, use three 3x3 convolutions in layer0. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
downsample_kernel_size +
+

Kernel size for downsampling convolutions. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
downsample_padding +
+

Padding for downsampling convolutions. Default: 0.

+
+

+ + TYPE: + int + + + DEFAULT: + 0 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
+ +
+ Source code in mindcv/models/senet.py +
234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
class SENet(nn.Cell):
+    r"""SENet model class, based on
+    `"Squeeze-and-Excitation Networks" <https://arxiv.org/abs/1709.01507>`_
+
+    Args:
+        block: block class of SENet.
+        layers: Number of residual blocks for 4 layers.
+        group: Number of groups for the conv in each bottleneck block.
+        reduction: Reduction ratio for Squeeze-and-Excitation modules.
+        drop_rate: Drop probability for the Dropout layer. Default: 0.
+        in_channels: number the channels of the input. Default: 3.
+        inplanes:  Number of input channels for layer1. Default: 64.
+        input3x3: If `True`, use three 3x3 convolutions in layer0. Default: False.
+        downsample_kernel_size: Kernel size for downsampling convolutions. Default: 1.
+        downsample_padding: Padding for downsampling convolutions. Default: 0.
+        num_classes (int): number of classification classes. Default: 1000.
+    """
+
+    def __init__(
+        self,
+        block: Type[Union[SEBottleneck, SEResNetBottleneck, SEResNetBlock, SEResNeXtBottleneck]],
+        layers: List[int],
+        group: int,
+        reduction: int,
+        drop_rate: float = 0.0,
+        in_channels: int = 3,
+        inplanes: int = 64,
+        input3x3: bool = False,
+        downsample_kernel_size: int = 1,
+        downsample_padding: int = 0,
+        num_classes: int = 1000,
+    ) -> None:
+        super(SENet, self).__init__()
+        self.inplanes = inplanes
+        self.num_classes = num_classes
+        self.drop_rate = drop_rate
+        if input3x3:
+            self.layer0 = nn.SequentialCell([
+                nn.Conv2d(in_channels, 64, 3, stride=2, pad_mode="pad", padding=1, has_bias=False),
+                nn.BatchNorm2d(64),
+                nn.ReLU(),
+                nn.Conv2d(64, 64, 3, stride=1, pad_mode="pad", padding=1, has_bias=False),
+                nn.BatchNorm2d(64),
+                nn.ReLU(),
+                nn.Conv2d(64, inplanes, 3, stride=1, pad_mode="pad", padding=1, has_bias=False),
+                nn.BatchNorm2d(inplanes),
+                nn.ReLU()
+            ])
+        else:
+            self.layer0 = nn.SequentialCell([
+                nn.Conv2d(in_channels, inplanes, kernel_size=7, stride=2, pad_mode="pad",
+                          padding=3, has_bias=False),
+                nn.BatchNorm2d(inplanes),
+                nn.ReLU()
+            ])
+        self.pool0 = nn.MaxPool2d(3, stride=2, pad_mode="same")
+
+        self.layer1 = self._make_layer(block, planes=64, blocks=layers[0], group=group,
+                                       reduction=reduction, downsample_kernel_size=1,
+                                       downsample_padding=0)
+
+        self.layer2 = self._make_layer(block, planes=128, blocks=layers[1], stride=2,
+                                       group=group, reduction=reduction,
+                                       downsample_kernel_size=downsample_kernel_size,
+                                       downsample_padding=downsample_padding)
+
+        self.layer3 = self._make_layer(block, planes=256, blocks=layers[2], stride=2,
+                                       group=group, reduction=reduction,
+                                       downsample_kernel_size=downsample_kernel_size,
+                                       downsample_padding=downsample_padding)
+
+        self.layer4 = self._make_layer(block, planes=512, blocks=layers[3], stride=2,
+                                       group=group, reduction=reduction,
+                                       downsample_kernel_size=downsample_kernel_size,
+                                       downsample_padding=downsample_padding)
+
+        self.num_features = 512 * block.expansion
+
+        self.pool = GlobalAvgPooling()
+        if self.drop_rate > 0.:
+            self.dropout = Dropout(p=self.drop_rate)
+        self.classifier = nn.Dense(self.num_features, self.num_classes)
+
+        self._initialize_weights()
+
+    def _make_layer(
+        self,
+        block: Type[Union[SEBottleneck, SEResNetBottleneck, SEResNetBlock, SEResNeXtBottleneck]],
+        planes: int,
+        blocks: int,
+        group: int,
+        reduction: int,
+        stride: int = 1,
+        downsample_kernel_size: int = 1,
+        downsample_padding: int = 0,
+    ) -> nn.SequentialCell:
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.SequentialCell([
+                nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=downsample_kernel_size,
+                          stride=stride, pad_mode="pad", padding=downsample_padding, has_bias=False),
+                nn.BatchNorm2d(planes * block.expansion)
+            ])
+
+        layers = [block(self.inplanes, planes, group, reduction, stride, downsample)]
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes, group, reduction))
+
+        return nn.SequentialCell(layers)
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(mode="fan_out", nonlinearity="relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(mode="fan_in", nonlinearity="sigmoid"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.layer0(x)
+        x = self.pool0(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        if self.drop_rate > 0.0:
+            x = self.dropout(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.senet.senet154(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
@register_model
+def senet154(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["senet154"]
+    model = SENet(block=SEBottleneck, layers=[3, 8, 36, 3], group=64, reduction=16,
+                  downsample_kernel_size=3, downsample_padding=1,  inplanes=128, input3x3=True,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnet101(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
436
+437
+438
+439
+440
+441
+442
+443
+444
+445
@register_model
+def seresnet101(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnet101"]
+    model = SENet(block=SEResNetBottleneck, layers=[3, 4, 23, 3], group=1, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnet152(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
448
+449
+450
+451
+452
+453
+454
+455
+456
+457
@register_model
+def seresnet152(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnet152"]
+    model = SENet(block=SEResNetBottleneck, layers=[3, 8, 36, 3], group=1, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnet18(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
400
+401
+402
+403
+404
+405
+406
+407
+408
+409
@register_model
+def seresnet18(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnet18"]
+    model = SENet(block=SEResNetBlock, layers=[2, 2, 2, 2], group=1, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnet34(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
412
+413
+414
+415
+416
+417
+418
+419
+420
+421
@register_model
+def seresnet34(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnet34"]
+    model = SENet(block=SEResNetBlock, layers=[3, 4, 6, 3], group=1, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnet50(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
424
+425
+426
+427
+428
+429
+430
+431
+432
+433
@register_model
+def seresnet50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnet50"]
+    model = SENet(block=SEResNetBottleneck, layers=[3, 4, 6, 3], group=1, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnext101_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
484
+485
+486
+487
+488
+489
+490
+491
+492
+493
@register_model
+def seresnext101_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnext101_32x4d"]
+    model = SENet(block=SEResNeXtBottleneck, layers=[3, 4, 23, 3], group=32, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnext26_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
460
+461
+462
+463
+464
+465
+466
+467
+468
+469
@register_model
+def seresnext26_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnext26_32x4d"]
+    model = SENet(block=SEResNeXtBottleneck, layers=[2, 2, 2, 2], group=32, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnext50_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
472
+473
+474
+475
+476
+477
+478
+479
+480
+481
@register_model
+def seresnext50_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnext50_32x4d"]
+    model = SENet(block=SEResNeXtBottleneck, layers=[3, 4, 6, 3], group=32, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

shufflenetv1

+ + +
+ + + +

+ mindcv.models.shufflenetv1.ShuffleNetV1 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

ShuffleNetV1 model class, based on +"ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" <https://arxiv.org/abs/1707.01083>_ # noqa: E501

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number of input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
model_size +
+

scale factor which controls the number of channels. Default: '2.0x'.

+
+

+ + TYPE: + str + + + DEFAULT: + '2.0x' + +

+
group +
+

number of group for group convolution. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Source code in mindcv/models/shufflenetv1.py +
119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
class ShuffleNetV1(nn.Cell):
+    r"""ShuffleNetV1 model class, based on
+    `"ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" <https://arxiv.org/abs/1707.01083>`_  # noqa: E501
+
+    Args:
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number of input channels. Default: 3.
+        model_size: scale factor which controls the number of channels. Default: '2.0x'.
+        group: number of group for group convolution. Default: 3.
+    """
+
+    def __init__(
+        self,
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        model_size: str = "2.0x",
+        group: int = 3,
+    ):
+        super().__init__()
+        self.stage_repeats = [4, 8, 4]
+        self.model_size = model_size
+        if group == 3:
+            if model_size == "0.5x":
+                self.stage_out_channels = [-1, 12, 120, 240, 480]
+            elif model_size == "1.0x":
+                self.stage_out_channels = [-1, 24, 240, 480, 960]
+            elif model_size == "1.5x":
+                self.stage_out_channels = [-1, 24, 360, 720, 1440]
+            elif model_size == "2.0x":
+                self.stage_out_channels = [-1, 48, 480, 960, 1920]
+            else:
+                raise NotImplementedError
+        elif group == 8:
+            if model_size == "0.5x":
+                self.stage_out_channels = [-1, 16, 192, 384, 768]
+            elif model_size == "1.0x":
+                self.stage_out_channels = [-1, 24, 384, 768, 1536]
+            elif model_size == "1.5x":
+                self.stage_out_channels = [-1, 24, 576, 1152, 2304]
+            elif model_size == "2.0x":
+                self.stage_out_channels = [-1, 48, 768, 1536, 3072]
+            else:
+                raise NotImplementedError
+
+        # building first layer
+        input_channel = self.stage_out_channels[1]
+        self.first_conv = nn.SequentialCell(
+            nn.Conv2d(in_channels, input_channel, kernel_size=3, stride=2, pad_mode="pad", padding=1),
+            nn.BatchNorm2d(input_channel),
+            nn.ReLU(),
+        )
+        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
+
+        features = []
+        for idxstage, numrepeat in enumerate(self.stage_repeats):
+            output_channel = self.stage_out_channels[idxstage + 2]
+            for i in range(numrepeat):
+                stride = 2 if i == 0 else 1
+                first_group = idxstage == 0 and i == 0
+                features.append(ShuffleV1Block(input_channel, output_channel,
+                                               group=group, first_group=first_group,
+                                               mid_channels=output_channel // 4, stride=stride))
+                input_channel = output_channel
+
+        self.features = nn.SequentialCell(features)
+        self.global_pool = GlobalAvgPooling()
+        self.classifier = nn.Dense(self.stage_out_channels[-1], num_classes, has_bias=False)
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        """Initialize weights for cells."""
+        for name, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                if "first" in name:
+                    cell.weight.set_data(
+                        init.initializer(init.Normal(0.01, 0), cell.weight.shape, cell.weight.dtype))
+                else:
+                    cell.weight.set_data(
+                        init.initializer(init.Normal(1.0 / cell.weight.shape[1], 0), cell.weight.shape,
+                                         cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.Normal(0.01, 0), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.first_conv(x)
+        x = self.max_pool(x)
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.global_pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g3_05(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 0.5 and 3 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
@register_model
+def shufflenet_v1_g3_05(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 0.5 and 3 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g3_05"]
+    model = ShuffleNetV1(group=3, model_size="0.5x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g3_10(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 1.0 and 3 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
@register_model
+def shufflenet_v1_g3_10(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 1.0 and 3 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g3_10"]
+    model = ShuffleNetV1(group=3, model_size="1.0x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g3_15(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 1.5 and 3 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
@register_model
+def shufflenet_v1_g3_15(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 1.5 and 3 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g3_15"]
+    model = ShuffleNetV1(group=3, model_size="1.5x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g3_20(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 2.0 and 3 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
@register_model
+def shufflenet_v1_g3_20(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 2.0 and 3 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g3_20"]
+    model = ShuffleNetV1(group=3, model_size="2.0x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g8_05(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 0.5 and 8 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
@register_model
+def shufflenet_v1_g8_05(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 0.5 and 8 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g8_05"]
+    model = ShuffleNetV1(group=8, model_size="0.5x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g8_10(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 1.0 and 8 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
@register_model
+def shufflenet_v1_g8_10(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 1.0 and 8 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g8_10"]
+    model = ShuffleNetV1(group=8, model_size="1.0x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g8_15(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 1.5 and 8 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
@register_model
+def shufflenet_v1_g8_15(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 1.5 and 8 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g8_15"]
+    model = ShuffleNetV1(group=8, model_size="1.5x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g8_20(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 2.0 and 8 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
@register_model
+def shufflenet_v1_g8_20(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 2.0 and 8 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g8_20"]
+    model = ShuffleNetV1(group=8, model_size="2.0x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

shufflenetv2

+ + +
+ + + +

+ mindcv.models.shufflenetv2.ShuffleNetV2 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

ShuffleNetV2 model class, based on +"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" <https://arxiv.org/abs/1807.11164>_

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number of input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
model_size +
+

scale factor which controls the number of channels. Default: '1.5x'.

+
+

+ + TYPE: + str + + + DEFAULT: + '1.5x' + +

+
+ +
+ Source code in mindcv/models/shufflenetv2.py +
117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
class ShuffleNetV2(nn.Cell):
+    r"""ShuffleNetV2 model class, based on
+    `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" <https://arxiv.org/abs/1807.11164>`_
+
+    Args:
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number of input channels. Default: 3.
+        model_size: scale factor which controls the number of channels. Default: '1.5x'.
+    """
+
+    def __init__(
+        self,
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        model_size: str = "1.5x",
+    ):
+        super().__init__()
+
+        self.stage_repeats = [4, 8, 4]
+        self.model_size = model_size
+        if model_size == "0.5x":
+            self.stage_out_channels = [-1, 24, 48, 96, 192, 1024]
+        elif model_size == "1.0x":
+            self.stage_out_channels = [-1, 24, 116, 232, 464, 1024]
+        elif model_size == "1.5x":
+            self.stage_out_channels = [-1, 24, 176, 352, 704, 1024]
+        elif model_size == "2.0x":
+            self.stage_out_channels = [-1, 24, 244, 488, 976, 2048]
+        else:
+            raise NotImplementedError
+
+        # building first layer
+        input_channel = self.stage_out_channels[1]
+        self.first_conv = nn.SequentialCell([
+            nn.Conv2d(in_channels, input_channel, kernel_size=3, stride=2,
+                      pad_mode="pad", padding=1),
+            nn.BatchNorm2d(input_channel),
+            nn.ReLU(),
+        ])
+        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
+
+        self.features = []
+        for idxstage, numrepeat in enumerate(self.stage_repeats):
+            output_channel = self.stage_out_channels[idxstage + 2]
+            for i in range(numrepeat):
+                if i == 0:
+                    self.features.append(ShuffleV2Block(input_channel, output_channel,
+                                                        mid_channels=output_channel // 2, kernel_size=3, stride=2))
+                else:
+                    self.features.append(ShuffleV2Block(input_channel // 2, output_channel,
+                                                        mid_channels=output_channel // 2, kernel_size=3, stride=1))
+                input_channel = output_channel
+
+        self.features = nn.SequentialCell(self.features)
+
+        self.conv_last = nn.SequentialCell([
+            nn.Conv2d(input_channel, self.stage_out_channels[-1], kernel_size=1, stride=1),
+            nn.BatchNorm2d(self.stage_out_channels[-1]),
+            nn.ReLU()
+        ])
+        self.pool = GlobalAvgPooling()
+        self.classifier = nn.Dense(self.stage_out_channels[-1], num_classes, has_bias=False)
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        """Initialize weights for cells."""
+        for name, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                if "first" in name:
+                    cell.weight.set_data(
+                        init.initializer(init.Normal(0.01, 0), cell.weight.shape, cell.weight.dtype))
+                else:
+                    cell.weight.set_data(
+                        init.initializer(init.Normal(1.0 / cell.weight.shape[1], 0), cell.weight.shape,
+                                         cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.Normal(0.01, 0), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.first_conv(x)
+        x = self.max_pool(x)
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.conv_last(x)
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv2.shufflenet_v2_x0_5(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV2 model with width scaled by 0.5. +Refer to the base class models.ShuffleNetV2 for more details.

+ +
+ Source code in mindcv/models/shufflenetv2.py +
220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
@register_model
+def shufflenet_v2_x0_5(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV2:
+    """Get ShuffleNetV2 model with width scaled by 0.5.
+    Refer to the base class `models.ShuffleNetV2` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v2_x0_5"]
+    model = ShuffleNetV2(model_size="0.5x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv2.shufflenet_v2_x1_0(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV2 model with width scaled by 1.0. +Refer to the base class models.ShuffleNetV2 for more details.

+ +
+ Source code in mindcv/models/shufflenetv2.py +
234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
@register_model
+def shufflenet_v2_x1_0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV2:
+    """Get ShuffleNetV2 model with width scaled by 1.0.
+    Refer to the base class `models.ShuffleNetV2` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v2_x1_0"]
+    model = ShuffleNetV2(model_size="1.0x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv2.shufflenet_v2_x1_5(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV2 model with width scaled by 1.5. +Refer to the base class models.ShuffleNetV2 for more details.

+ +
+ Source code in mindcv/models/shufflenetv2.py +
248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
@register_model
+def shufflenet_v2_x1_5(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV2:
+    """Get ShuffleNetV2 model with width scaled by 1.5.
+    Refer to the base class `models.ShuffleNetV2` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v2_x1_5"]
+    model = ShuffleNetV2(model_size="1.5x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv2.shufflenet_v2_x2_0(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV2 model with width scaled by 2.0. +Refer to the base class models.ShuffleNetV2 for more details.

+ +
+ Source code in mindcv/models/shufflenetv2.py +
262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
@register_model
+def shufflenet_v2_x2_0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV2:
+    """Get ShuffleNetV2 model with width scaled by 2.0.
+    Refer to the base class `models.ShuffleNetV2` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v2_x2_0"]
+    model = ShuffleNetV2(model_size="2.0x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

sknet

+ + +
+ + + +

+ mindcv.models.sknet.SKNet + + +

+ + +
+

+ Bases: ResNet

+ + +

SKNet model class, based on +"Selective Kernel Networks" <https://arxiv.org/abs/1903.06586>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
block +
+

block of sknet.

+
+

+ + TYPE: + Type[nn.Cell] + +

+
layers +
+

number of layers of each stage.

+
+

+ + TYPE: + List[int] + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
groups +
+

number of groups for group conv in blocks. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
base_width +
+

base width of pre group hidden channel in blocks. Default: 64.

+
+

+ + TYPE: + int + + + DEFAULT: + 64 + +

+
norm +
+

normalization layer in blocks. Default: None.

+
+

+ + TYPE: + Optional[nn.Cell] + + + DEFAULT: + None + +

+
sk_kwargs +
+

kwargs of selective kernel. Default: None.

+
+

+ + TYPE: + Optional[Dict] + + + DEFAULT: + None + +

+
+ +
+ Source code in mindcv/models/sknet.py +
144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
class SKNet(ResNet):
+    r"""SKNet model class, based on
+    `"Selective Kernel Networks" <https://arxiv.org/abs/1903.06586>`_
+
+    Args:
+        block: block of sknet.
+        layers: number of layers of each stage.
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number the channels of the input. Default: 3.
+        groups: number of groups for group conv in blocks. Default: 1.
+        base_width: base width of pre group hidden channel in blocks. Default: 64.
+        norm: normalization layer in blocks. Default: None.
+        sk_kwargs: kwargs of selective kernel. Default: None.
+    """
+
+    def __init__(
+        self,
+        block: Type[nn.Cell],
+        layers: List[int],
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        groups: int = 1,
+        base_width: int = 64,
+        norm: Optional[nn.Cell] = None,
+        sk_kwargs: Optional[Dict] = None,
+    ) -> None:
+        self.sk_kwargs: Optional[Dict] = sk_kwargs  # make pylint happy
+        super().__init__(block, layers, num_classes, in_channels, groups, base_width, norm)
+
+    def _make_layer(
+        self,
+        block: Type[Union[SelectiveKernelBasic, SelectiveKernelBottleneck]],
+        channels: int,
+        block_nums: int,
+        stride: int = 1,
+    ) -> nn.SequentialCell:
+        down_sample = None
+
+        if stride != 1 or self.input_channels != channels * block.expansion:
+            down_sample = nn.SequentialCell([
+                nn.Conv2d(self.input_channels, channels * block.expansion, kernel_size=1, stride=stride),
+                self.norm(channels * block.expansion)
+            ])
+
+        layers = []
+        layers.append(
+            block(
+                self.input_channels,
+                channels,
+                stride=stride,
+                down_sample=down_sample,
+                groups=self.groups,
+                base_width=self.base_with,
+                norm=self.norm,
+                sk_kwargs=self.sk_kwargs,
+            )
+        )
+        self.input_channels = channels * block.expansion
+
+        for _ in range(1, block_nums):
+            layers.append(
+                block(
+                    self.input_channels,
+                    channels,
+                    groups=self.groups,
+                    base_width=self.base_with,
+                    norm=self.norm,
+                    sk_kwargs=self.sk_kwargs,
+                )
+            )
+
+        return nn.SequentialCell(layers)
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.sknet.skresnet18(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 18 layers SKNet model. +Refer to the base class models.SKNet for more details.

+ +
+ Source code in mindcv/models/sknet.py +
218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
@register_model
+def skresnet18(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ResNet:
+    """Get 18 layers SKNet model.
+    Refer to the base class `models.SKNet` for more details.
+    """
+    default_cfg = default_cfgs["skresnet18"]
+    sk_kwargs = dict(rd_ratio=1 / 8, rd_divisor=16, split_input=True)
+    model = SKNet(SelectiveKernelBasic, [2, 2, 2, 2], num_classes=num_classes, in_channels=in_channels,
+                  sk_kwargs=sk_kwargs, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.sknet.skresnet34(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 34 layers SKNet model. +Refer to the base class models.SKNet for more details.

+ +
+ Source code in mindcv/models/sknet.py +
234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
@register_model
+def skresnet34(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ResNet:
+    """Get 34 layers SKNet model.
+    Refer to the base class `models.SKNet` for more details.
+    """
+    default_cfg = default_cfgs["skresnet34"]
+    sk_kwargs = dict(rd_ratio=1 / 8, rd_divisor=16, split_input=True)
+    model = SKNet(SelectiveKernelBasic, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels,
+                  sk_kwargs=sk_kwargs, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.sknet.skresnet50(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers SKNet model. +Refer to the base class models.SKNet for more details.

+ +
+ Source code in mindcv/models/sknet.py +
250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
@register_model
+def skresnet50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ResNet:
+    """Get 50 layers SKNet model.
+    Refer to the base class `models.SKNet` for more details.
+    """
+    default_cfg = default_cfgs["skresnet50"]
+    sk_kwargs = dict(split_input=True)
+    model = SKNet(SelectiveKernelBottleneck, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels,
+                  sk_kwargs=sk_kwargs, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.sknet.skresnext50_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers SKNeXt model with 32 groups of GPConv. +Refer to the base class models.SKNet for more details.

+ +
+ Source code in mindcv/models/sknet.py +
266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
@register_model
+def skresnext50_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ResNet:
+    """Get 50 layers SKNeXt model with 32 groups of GPConv.
+    Refer to the base class `models.SKNet` for more details.
+    """
+    default_cfg = default_cfgs["skresnext50_32x4d"]
+    sk_kwargs = dict(rd_ratio=1 / 16, rd_divisor=32, split_input=False)
+    model = SKNet(SelectiveKernelBottleneck, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels,
+                  sk_kwargs=sk_kwargs, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

squeezenet

+ + +
+ + + +

+ mindcv.models.squeezenet.SqueezeNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

SqueezeNet model class, based on +"SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size" <https://arxiv.org/abs/1602.07360>_ # noqa: E501

+

.. note:: + Important: In contrast to the other models the inception_v3 expects tensors with a size of + N x 3 x 227 x 227, so ensure your images are sized accordingly.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
version +
+

version of the architecture, '1_0' or '1_1'. Default: '1_0'.

+
+

+ + TYPE: + str + + + DEFAULT: + '1_0' + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
drop_rate +
+

dropout rate of the classifier. Default: 0.5.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.5 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Source code in mindcv/models/squeezenet.py +
 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
class SqueezeNet(nn.Cell):
+    r"""SqueezeNet model class, based on
+    `"SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size" <https://arxiv.org/abs/1602.07360>`_  # noqa: E501
+
+    .. note::
+        **Important**: In contrast to the other models the inception_v3 expects tensors with a size of
+        N x 3 x 227 x 227, so ensure your images are sized accordingly.
+
+    Args:
+        version: version of the architecture, '1_0' or '1_1'. Default: '1_0'.
+        num_classes: number of classification classes. Default: 1000.
+        drop_rate: dropout rate of the classifier. Default: 0.5.
+        in_channels: number the channels of the input. Default: 3.
+    """
+
+    def __init__(
+        self,
+        version: str = "1_0",
+        num_classes: int = 1000,
+        drop_rate: float = 0.5,
+        in_channels: int = 3,
+    ) -> None:
+        super().__init__()
+        if version == "1_0":
+            self.features = nn.SequentialCell([
+                nn.Conv2d(in_channels, 96, kernel_size=7, stride=2, pad_mode="valid", has_bias=True),
+                nn.ReLU(),
+                nn.MaxPool2d(kernel_size=3, stride=2),
+                Fire(96, 16, 64, 64),
+                Fire(128, 16, 64, 64),
+                Fire(128, 32, 128, 128),
+                nn.MaxPool2d(kernel_size=3, stride=2),
+                Fire(256, 32, 128, 128),
+                Fire(256, 48, 192, 192),
+                Fire(384, 48, 192, 192),
+                Fire(384, 64, 256, 256),
+                nn.MaxPool2d(kernel_size=3, stride=2),
+                Fire(512, 64, 256, 256),
+            ])
+        elif version == "1_1":
+            self.features = nn.SequentialCell([
+                nn.Conv2d(in_channels, 64, kernel_size=3, stride=2, padding=1, pad_mode="pad", has_bias=True),
+                nn.ReLU(),
+                nn.MaxPool2d(kernel_size=3, stride=2),
+                Fire(64, 16, 64, 64),
+                Fire(128, 16, 64, 64),
+                nn.MaxPool2d(kernel_size=3, stride=2),
+                Fire(128, 32, 128, 128),
+                Fire(256, 32, 128, 128),
+                nn.MaxPool2d(kernel_size=3, stride=2),
+                Fire(256, 48, 192, 192),
+                Fire(384, 48, 192, 192),
+                Fire(384, 64, 256, 256),
+                Fire(512, 64, 256, 256),
+            ])
+        else:
+            raise ValueError(f"Unsupported SqueezeNet version {version}: 1_0 or 1_1 expected")
+
+        self.final_conv = nn.Conv2d(512, num_classes, kernel_size=1, has_bias=True)
+        self.classifier = nn.SequentialCell([
+            Dropout(p=drop_rate),
+            self.final_conv,
+            nn.ReLU(),
+            GlobalAvgPooling()
+        ])
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                if cell is self.final_conv:
+                    cell.weight.set_data(init.initializer(init.Normal(), cell.weight.shape, cell.weight.dtype))
+                else:
+                    cell.weight.set_data(init.initializer(init.HeUniform(), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.squeezenet.squeezenet1_0(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get SqueezeNet model of version 1.0. +Refer to the base class models.SqueezeNet for more details.

+ +
+ Source code in mindcv/models/squeezenet.py +
153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
@register_model
+def squeezenet1_0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> SqueezeNet:
+    """Get SqueezeNet model of version 1.0.
+    Refer to the base class `models.SqueezeNet` for more details.
+    """
+    default_cfg = default_cfgs["squeezenet1_0"]
+    model = SqueezeNet(version="1_0", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.squeezenet.squeezenet1_1(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get SqueezeNet model of version 1.1. +Refer to the base class models.SqueezeNet for more details.

+ +
+ Source code in mindcv/models/squeezenet.py +
167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
@register_model
+def squeezenet1_1(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> SqueezeNet:
+    """Get SqueezeNet model of version 1.1.
+    Refer to the base class `models.SqueezeNet` for more details.
+    """
+    default_cfg = default_cfgs["squeezenet1_1"]
+    model = SqueezeNet(version="1_1", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

swintransformer

+ + +
+ + + +

+ mindcv.models.swintransformer.SwinTransformer + + +

+ + +
+

+ Bases: nn.Cell

+ + +

SwinTransformer model class, based on +"Swin Transformer: Hierarchical Vision Transformer using Shifted Windows" <https://arxiv.org/pdf/2103.14030>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
image_size +
+

Input image size. Default 224

+
+

+ + TYPE: + int | tuple(int + + + DEFAULT: + 224 + +

+
patch_size +
+

Patch size. Default: 4

+
+

+ + TYPE: + int | tuple(int + + + DEFAULT: + 4 + +

+
in_chans +
+

Number of input image channels. Default: 3

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

Number of classes for classification head. Default: 1000

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
embed_dim +
+

Patch embedding dimension. Default: 96

+
+

+ + TYPE: + int + + + DEFAULT: + 96 + +

+
depths +
+

Depth of each Swin Transformer layer.

+
+

+ + TYPE: + tuple(int + + + DEFAULT: + None + +

+
num_heads +
+

Number of attention heads in different layers.

+
+

+ + TYPE: + tuple(int + + + DEFAULT: + None + +

+
window_size +
+

Window size. Default: 7

+
+

+ + TYPE: + int + + + DEFAULT: + 7 + +

+
mlp_ratio +
+

Ratio of mlp hidden dim to embedding dim. Default: 4

+
+

+ + TYPE: + float + + + DEFAULT: + 4.0 + +

+
qkv_bias +
+

If True, add a learnable bias to query, key, value. Default: True

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
qk_scale +
+

Override default qk scale of head_dim ** -0.5 if set. Default: None

+
+

+ + TYPE: + float + + + DEFAULT: + None + +

+
drop_rate +
+

Dropout rate. Default: 0

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
attn_drop_rate +
+

Attention dropout rate. Default: 0

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
drop_path_rate +
+

Stochastic depth rate. Default: 0.1

+
+

+ + TYPE: + float + + + DEFAULT: + 0.1 + +

+
norm_layer +
+

Normalization layer. Default: nn.LayerNorm.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + nn.LayerNorm + +

+
ape +
+

If True, add absolute position embedding to the patch embedding. Default: False

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
patch_norm +
+

If True, add normalization after patch embedding. Default: True

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
+ +
+ Source code in mindcv/models/swintransformer.py +
567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
+613
+614
+615
+616
+617
+618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
+663
+664
+665
+666
+667
+668
+669
+670
+671
+672
+673
+674
+675
+676
+677
+678
+679
+680
+681
+682
+683
+684
+685
+686
+687
+688
+689
+690
+691
+692
+693
+694
+695
+696
class SwinTransformer(nn.Cell):
+    r"""SwinTransformer model class, based on
+    `"Swin Transformer: Hierarchical Vision Transformer using Shifted Windows" <https://arxiv.org/pdf/2103.14030>`_
+
+    Args:
+        image_size (int | tuple(int)): Input image size. Default 224
+        patch_size (int | tuple(int)): Patch size. Default: 4
+        in_chans (int): Number of input image channels. Default: 3
+        num_classes (int): Number of classes for classification head. Default: 1000
+        embed_dim (int): Patch embedding dimension. Default: 96
+        depths (tuple(int)): Depth of each Swin Transformer layer.
+        num_heads (tuple(int)): Number of attention heads in different layers.
+        window_size (int): Window size. Default: 7
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4
+        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None
+        drop_rate (float): Dropout rate. Default: 0
+        attn_drop_rate (float): Attention dropout rate. Default: 0
+        drop_path_rate (float): Stochastic depth rate. Default: 0.1
+        norm_layer (nn.Cell): Normalization layer. Default: nn.LayerNorm.
+        ape (bool): If True, add absolute position embedding to the patch embedding. Default: False
+        patch_norm (bool): If True, add normalization after patch embedding. Default: True
+    """
+
+    def __init__(
+        self,
+        image_size: int = 224,
+        patch_size: int = 4,
+        in_chans: int = 3,
+        num_classes: int = 1000,
+        embed_dim: int = 96,
+        depths: Optional[List[int]] = None,
+        num_heads: Optional[List[int]] = None,
+        window_size: int = 7,
+        mlp_ratio: float = 4.0,
+        qkv_bias: bool = True,
+        qk_scale: Optional[int] = None,
+        drop_rate: float = 0.0,
+        attn_drop_rate: float = 0.0,
+        drop_path_rate: float = 0.1,
+        norm_layer: Optional[nn.Cell] = nn.LayerNorm,
+        ape: bool = False,
+        patch_norm: bool = True,
+    ) -> None:
+        super().__init__()
+
+        self.num_classes = num_classes
+        self.num_layers = len(depths)
+        self.embed_dim = embed_dim
+        self.ape = ape
+        self.patch_norm = patch_norm
+        self.num_features = int(embed_dim * 2 ** (self.num_layers - 1))
+        self.mlp_ratio = mlp_ratio
+
+        # split image into non-overlapping patches
+        self.patch_embed = PatchEmbed(
+            image_size=image_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None)
+        num_patches = self.patch_embed.num_patches
+        patches_resolution = self.patch_embed.patches_resolution
+        self.patches_resolution = patches_resolution
+
+        # absolute position embedding
+        if self.ape:
+            self.absolute_pos_embed = Parameter(Tensor(np.zeros(1, num_patches, embed_dim), dtype=mstype.float32))
+
+        self.pos_drop = Dropout(p=drop_rate)
+
+        # stochastic depth
+        dpr = [x for x in np.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+
+        # build layers
+        self.layers = nn.CellList()
+        for i_layer in range(self.num_layers):
+            layer = BasicLayer(dim=int(embed_dim * 2 ** i_layer),
+                               input_resolution=(patches_resolution[0] // (2 ** i_layer),
+                                                 patches_resolution[1] // (2 ** i_layer)),
+                               depth=depths[i_layer],
+                               num_heads=num_heads[i_layer],
+                               window_size=window_size,
+                               mlp_ratio=self.mlp_ratio,
+                               qkv_bias=qkv_bias, qk_scale=qk_scale,
+                               drop=drop_rate, attn_drop=attn_drop_rate,
+                               drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
+                               norm_layer=norm_layer,
+                               downsample=PatchMerging if (i_layer < self.num_layers - 1) else None)
+            self.layers.append(layer)
+
+        self.norm = norm_layer([self.num_features, ], epsilon=1e-5)
+        self.classifier = nn.Dense(in_channels=self.num_features,
+                                   out_channels=num_classes, has_bias=True) if num_classes > 0 else Identity()
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Dense):
+                cell.weight.set_data(init.initializer(init.TruncatedNormal(sigma=0.02),
+                                                      cell.weight.shape, cell.weight.dtype))
+                if isinstance(cell, nn.Dense) and cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Zero(), cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.LayerNorm):
+                cell.gamma.set_data(init.initializer(init.One(), cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer(init.Zero(), cell.beta.shape, cell.beta.dtype))
+
+    def no_weight_decay(self) -> None:
+        return {"absolute_pos_embed"}
+
+    def no_weight_decay_keywords(self) -> None:
+        return {"relative_position_bias_table"}
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.classifier(x)
+        return x
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.patch_embed(x)
+        if self.ape:
+            x = x + self.absolute_pos_embed
+        x = self.pos_drop(x)
+        for layer in self.layers:
+            x = layer(x)
+        x = self.norm(x)  # B L C
+        x = ops.mean(ops.transpose(x, (0, 2, 1)), 2)  # B C 1
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.swintransformer.swin_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get SwinTransformer tiny model. +Refer to the base class 'models.SwinTransformer' for more details.

+ +
+ Source code in mindcv/models/swintransformer.py +
699
+700
+701
+702
+703
+704
+705
+706
+707
+708
+709
+710
+711
+712
+713
+714
@register_model
+def swin_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> SwinTransformer:
+    """Get SwinTransformer tiny model.
+    Refer to the base class 'models.SwinTransformer' for more details.
+    """
+    default_cfg = default_cfgs["swin_tiny"]
+    model = SwinTransformer(image_size=224, patch_size=4, in_chans=in_channels, num_classes=num_classes,
+                            embed_dim=96, depths=[2, 2, 6, 2], num_heads=[3, 6, 12, 24], window_size=7,
+                            mlp_ratio=4., qkv_bias=True, qk_scale=None,
+                            drop_rate=0., attn_drop_rate=0., drop_path_rate=0.2,
+                            norm_layer=nn.LayerNorm, ape=False, patch_norm=True, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

swintransformerv2

+ + +
+ + + +

+ mindcv.models.swintransformerv2.SwinTransformerV2 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

SwinTransformerV2 model class, based on +"Swin Transformer V2: Scaling Up Capacity and Resolution" <https://arxiv.org/abs/2111.09883>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
image_size +
+

Input image size. Default: 256.

+
+

+ + TYPE: + int + + + DEFAULT: + 256 + +

+
patch_size +
+

Patch size. Default: 4.

+
+

+ + TYPE: + int + + + DEFAULT: + 4 + +

+
in_channels +
+

Number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

Number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
embed_dim +
+

Patch embedding dimension. Default: 96.

+
+

+ + TYPE: + int + + + DEFAULT: + 96 + +

+
depths +
+

Depth of each Swin Transformer layer. Default: [2, 2, 6, 2].

+
+

+ + TYPE: + List[int] + + + DEFAULT: + [2, 2, 6, 2] + +

+
num_heads +
+

Number of attention heads in different layers. Default: [3, 6, 12, 24].

+
+

+ + TYPE: + List[int] + + + DEFAULT: + [3, 6, 12, 24] + +

+
window_size +
+

Window size. Default: 7.

+
+

+ + TYPE: + int + + + DEFAULT: + 7 + +

+
mlp_ratio +
+

Ratio of mlp hidden dim to embedding dim. Default: 4.

+
+

+ + TYPE: + float + + + DEFAULT: + 4.0 + +

+
qkv_bias +
+

If True, add a bias for query, key, value. Default: True.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
drop_rate +
+

Drop probability for the Dropout layer. Default: 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
attn_drop_rate +
+

Attention drop probability for the Dropout layer. Default: 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
drop_path_rate +
+

Stochastic depth rate. Default: 0.1.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.1 + +

+
norm_layer +
+

Normalization layer. Default: nn.LayerNorm.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + nn.LayerNorm + +

+
patch_norm +
+

If True, add normalization after patch embedding. Default: True.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
pretrained_window_sizes +
+

Pretrained window sizes of each layer. Default: [0, 0, 0, 0].

+
+

+ + TYPE: + List[int] + + + DEFAULT: + [0, 0, 0, 0] + +

+
+ +
+ Source code in mindcv/models/swintransformerv2.py +
521
+522
+523
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
+613
+614
+615
+616
+617
+618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
class SwinTransformerV2(nn.Cell):
+    r"""SwinTransformerV2 model class, based on
+    `"Swin Transformer V2: Scaling Up Capacity and Resolution" <https://arxiv.org/abs/2111.09883>`_
+
+    Args:
+        image_size: Input image size. Default: 256.
+        patch_size: Patch size. Default: 4.
+        in_channels: Number the channels of the input. Default: 3.
+        num_classes: Number of classification classes. Default: 1000.
+        embed_dim: Patch embedding dimension. Default: 96.
+        depths: Depth of each Swin Transformer layer. Default: [2, 2, 6, 2].
+        num_heads: Number of attention heads in different layers. Default: [3, 6, 12, 24].
+        window_size: Window size. Default: 7.
+        mlp_ratio: Ratio of mlp hidden dim to embedding dim. Default: 4.
+        qkv_bias: If True, add a bias for query, key, value. Default: True.
+        drop_rate: Drop probability for the Dropout layer. Default: 0.
+        attn_drop_rate: Attention drop probability for the Dropout layer. Default: 0.
+        drop_path_rate: Stochastic depth rate. Default: 0.1.
+        norm_layer: Normalization layer. Default: nn.LayerNorm.
+        patch_norm: If True, add normalization after patch embedding. Default: True.
+        pretrained_window_sizes: Pretrained window sizes of each layer. Default: [0, 0, 0, 0].
+    """
+
+    def __init__(
+        self,
+        image_size: int = 256,
+        patch_size: int = 4,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+        embed_dim: int = 96,
+        depths: List[int] = [2, 2, 6, 2],
+        num_heads: List[int] = [3, 6, 12, 24],
+        window_size: int = 7,
+        mlp_ratio: float = 4.0,
+        qkv_bias: bool = True,
+        drop_rate: float = 0.0,
+        attn_drop_rate: float = 0.0,
+        drop_path_rate: float = 0.1,
+        norm_layer: nn.Cell = nn.LayerNorm,
+        patch_norm: bool = True,
+        pretrained_window_sizes: List[int] = [0, 0, 0, 0],
+    ) -> None:
+        super().__init__()
+        self.num_classes = num_classes
+        self.num_layers = len(depths)
+        self.embed_dim = embed_dim
+        self.in_channels = in_channels
+        self.patch_size = patch_size
+        self.patch_norm = patch_norm
+        self.num_features = int(embed_dim * 2 ** (self.num_layers - 1))
+        self.mlp_ratio = mlp_ratio
+
+        # split image into non-overlapping patches
+        self.patch_embed = PatchEmbed(
+            image_size=image_size, patch_size=patch_size, in_chans=in_channels, embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None)
+        num_patches = self.patch_embed.num_patches
+        self.num_patches = num_patches
+        patches_resolution = self.patch_embed.patches_resolution
+        self.patches_resolution = patches_resolution
+
+        self.pos_drop = Dropout(p=drop_rate)
+
+        # stochastic depth
+        dpr = [x for x in np.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+
+        # build layers
+        self.layers = nn.CellList()
+        self.final_seq = num_patches  # downsample seq_length
+        for i_layer in range(self.num_layers):
+            layer = BasicLayer(
+                dim=int(embed_dim * 2**i_layer),
+                input_resolution=(patches_resolution[0] // (2**i_layer),
+                                  patches_resolution[1] // (2**i_layer)),
+                depth=depths[i_layer],
+                num_heads=num_heads[i_layer],
+                window_size=window_size,
+                mlp_ratio=self.mlp_ratio,
+                qkv_bias=qkv_bias,
+                drop=drop_rate, attn_drop=attn_drop_rate,
+                drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
+                norm_layer=norm_layer,
+                downsample=PatchMerging if (i_layer < self.num_layers - 1) else None,
+                pretrained_window_size=pretrained_window_sizes[i_layer]
+            )
+            # downsample seq_length
+            if i_layer < self.num_layers - 1:
+                self.final_seq = self.final_seq // 4
+            self.layers.append(layer)
+        self.head = nn.Dense(self.num_features, self.num_classes)
+
+        self.norm = norm_layer([self.num_features, ], epsilon=1e-6)
+        self.avgpool = ops.ReduceMean(keep_dims=False)
+
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(init.initializer(init.HeUniform(), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.LayerNorm):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.TruncatedNormal(sigma=0.02), cell.weight.shape, cell.weight.dtype)
+                )
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.patch_embed(x)
+        x = self.pos_drop(x)
+        for layer in self.layers:
+            x = layer(x)
+        x = self.norm(x)  # B L C
+        x = self.avgpool(ops.transpose(x, (0, 2, 1)), 2)  # B C 1
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.head(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.swintransformerv2.swinv2_base_window16(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/swintransformerv2.py +
717
+718
+719
+720
+721
+722
+723
+724
+725
+726
+727
@register_model
+def swinv2_base_window16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["swinv2_base_window16"]
+    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,
+                              window_size=16, embed_dim=128, depths=[2, 2, 18, 2],
+                              num_heads=[4, 8, 16, 32], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.swintransformerv2.swinv2_base_window8(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/swintransformerv2.py +
704
+705
+706
+707
+708
+709
+710
+711
+712
+713
+714
@register_model
+def swinv2_base_window8(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["swinv2_base_window8"]
+    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,
+                              window_size=8, embed_dim=128, depths=[2, 2, 18, 2],
+                              num_heads=[4, 8, 16, 32], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.swintransformerv2.swinv2_small_window16(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/swintransformerv2.py +
691
+692
+693
+694
+695
+696
+697
+698
+699
+700
+701
@register_model
+def swinv2_small_window16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["swinv2_small_window16"]
+    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,
+                              window_size=16, embed_dim=96, depths=[2, 2, 18, 2],
+                              num_heads=[3, 6, 12, 24], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.swintransformerv2.swinv2_small_window8(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/swintransformerv2.py +
678
+679
+680
+681
+682
+683
+684
+685
+686
+687
+688
@register_model
+def swinv2_small_window8(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["swinv2_small_window8"]
+    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,
+                              window_size=8, embed_dim=96, depths=[2, 2, 18, 2],
+                              num_heads=[3, 6, 12, 24], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.swintransformerv2.swinv2_tiny_window16(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/swintransformerv2.py +
665
+666
+667
+668
+669
+670
+671
+672
+673
+674
+675
@register_model
+def swinv2_tiny_window16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["swinv2_tiny_window16"]
+    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,
+                              window_size=16, embed_dim=96, depths=[2, 2, 6, 2],
+                              num_heads=[3, 6, 12, 24], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.swintransformerv2.swinv2_tiny_window8(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/swintransformerv2.py +
652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
@register_model
+def swinv2_tiny_window8(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["swinv2_tiny_window8"]
+    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,
+                              window_size=8, embed_dim=96, depths=[2, 2, 6, 2],
+                              num_heads=[3, 6, 12, 24], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

vgg

+ + +
+ + + +

+ mindcv.models.vgg.VGG + + +

+ + +
+

+ Bases: nn.Cell

+ + +

VGGNet model class, based on +"Very Deep Convolutional Networks for Large-Scale Image Recognition" <https://arxiv.org/abs/1409.1556>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
model_name +
+

name of the architecture. 'vgg11', 'vgg13', 'vgg16' or 'vgg19'.

+
+

+ + TYPE: + str + +

+
batch_norm +
+

use batch normalization or not. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
drop_rate +
+

dropout rate of the classifier. Default: 0.5.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.5 + +

+
+ +
+ Source code in mindcv/models/vgg.py +
 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
class VGG(nn.Cell):
+    r"""VGGNet model class, based on
+    `"Very Deep Convolutional Networks for Large-Scale Image Recognition" <https://arxiv.org/abs/1409.1556>`_
+
+    Args:
+        model_name: name of the architecture. 'vgg11', 'vgg13', 'vgg16' or 'vgg19'.
+        batch_norm: use batch normalization or not. Default: False.
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number the channels of the input. Default: 3.
+        drop_rate: dropout rate of the classifier. Default: 0.5.
+    """
+
+    def __init__(
+        self,
+        model_name: str,
+        batch_norm: bool = False,
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        drop_rate: float = 0.5,
+    ) -> None:
+        super().__init__()
+        cfg = cfgs[model_name]
+        self.features = _make_layers(cfg, batch_norm=batch_norm, in_channels=in_channels)
+        self.flatten = nn.Flatten()
+        self.classifier = nn.SequentialCell([
+            nn.Dense(512 * 7 * 7, 4096),
+            nn.ReLU(),
+            Dropout(p=drop_rate),
+            nn.Dense(4096, 4096),
+            nn.ReLU(),
+            Dropout(p=drop_rate),
+            nn.Dense(4096, num_classes),
+        ])
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(math.sqrt(5), mode="fan_out", nonlinearity="relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.Normal(0.01), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.flatten(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.vgg.vgg11(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 11 layers VGG model. +Refer to the base class models.VGG for more details.

+ +
+ Source code in mindcv/models/vgg.py +
138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
@register_model
+def vgg11(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VGG:
+    """Get 11 layers VGG model.
+    Refer to the base class `models.VGG` for more details.
+    """
+    default_cfg = default_cfgs["vgg11"]
+    model = VGG(model_name="vgg11", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vgg.vgg13(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 13 layers VGG model. +Refer to the base class models.VGG for more details.

+ +
+ Source code in mindcv/models/vgg.py +
152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
@register_model
+def vgg13(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VGG:
+    """Get 13 layers VGG model.
+    Refer to the base class `models.VGG` for more details.
+    """
+    default_cfg = default_cfgs["vgg13"]
+    model = VGG(model_name="vgg13", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vgg.vgg16(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 16 layers VGG model. +Refer to the base class models.VGG for more details.

+ +
+ Source code in mindcv/models/vgg.py +
166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
@register_model
+def vgg16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VGG:
+    """Get 16 layers VGG model.
+    Refer to the base class `models.VGG` for more details.
+    """
+    default_cfg = default_cfgs["vgg16"]
+    model = VGG(model_name="vgg16", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vgg.vgg19(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 19 layers VGG model. +Refer to the base class models.VGG for more details.

+ +
+ Source code in mindcv/models/vgg.py +
180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
@register_model
+def vgg19(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VGG:
+    """Get 19 layers VGG model.
+    Refer to the base class `models.VGG` for more details.
+    """
+    default_cfg = default_cfgs["vgg19"]
+    model = VGG(model_name="vgg19", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

visformer

+ + +
+ + + +

+ mindcv.models.visformer.Visformer + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Visformer model class, based on +'"Visformer: The Vision-friendly Transformer" +https://arxiv.org/pdf/2104.12533.pdf'

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
image_size +
+

images input size. Default: 224.

+
+

+ + TYPE: + int) + +

+
number +
+

32.

+
+

+ + TYPE: + the channels of the input. Default + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int) + + + DEFAULT: + 1000 + +

+
embed_dim +
+

embedding dimension in all head. Default: 384.

+
+

+ + TYPE: + int) + + + DEFAULT: + 384 + +

+
depth +
+

model block depth. Default: None.

+
+

+ + TYPE: + int) + + + DEFAULT: + None + +

+
num_heads +
+

number of heads. Default: None.

+
+

+ + TYPE: + int) + + + DEFAULT: + None + +

+
mlp_ratio +
+

ratio of hidden features in Mlp. Default: 4.

+
+

+ + TYPE: + float) + + + DEFAULT: + 4.0 + +

+
qkv_bias +
+

have bias in qkv layers or not. Default: False.

+
+

+ + TYPE: + bool) + + + DEFAULT: + False + +

+
qk_scale +
+

Override default qk scale of head_dim ** -0.5 if set.

+
+

+ + TYPE: + float) + + + DEFAULT: + None + +

+
drop_rate +
+

dropout rate. Default: 0.

+
+

+ + TYPE: + float) + + + DEFAULT: + 0.0 + +

+
attn_drop_rate +
+

attention layers dropout rate. Default: 0.

+
+

+ + TYPE: + float) + + + DEFAULT: + 0.0 + +

+
drop_path_rate +
+

drop path rate. Default: 0.1.

+
+

+ + TYPE: + float) + + + DEFAULT: + 0.1 + +

+
attn_stage +
+

block will have a attention layer if value = '1' else not. Default: '1111'.

+
+

+ + TYPE: + str) + + + DEFAULT: + '1111' + +

+
pos_embed +
+

position embedding. Default: True.

+
+

+ + TYPE: + bool) + + + DEFAULT: + True + +

+
spatial_conv +
+

block will have a spatial convolution layer if value = '1' else not. Default: '1111'.

+
+

+ + TYPE: + str) + + + DEFAULT: + '1111' + +

+
group +
+

convolution group. Default: 8.

+
+

+ + TYPE: + int) + + + DEFAULT: + 8 + +

+
pool +
+

if true will use global_pooling else not. Default: True.

+
+

+ + TYPE: + bool) + + + DEFAULT: + True + +

+
conv_init +
+

if true will init convolution weights else not. Default: False.

+
+

+ + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/visformer.py +
210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
class Visformer(nn.Cell):
+    r"""Visformer model class, based on
+    '"Visformer: The Vision-friendly Transformer"
+    <https://arxiv.org/pdf/2104.12533.pdf>'
+
+    Args:
+        image_size (int) : images input size. Default: 224.
+        number the channels of the input. Default: 32.
+        num_classes (int) : number of classification classes. Default: 1000.
+        embed_dim (int) : embedding dimension in all head. Default: 384.
+        depth (int) : model block depth. Default: None.
+        num_heads (int) : number of heads. Default: None.
+        mlp_ratio (float) : ratio of hidden features in Mlp. Default: 4.
+        qkv_bias (bool) : have bias in qkv layers or not. Default: False.
+        qk_scale (float) : Override default qk scale of head_dim ** -0.5 if set.
+        drop_rate (float) : dropout rate. Default: 0.
+        attn_drop_rate (float) : attention layers dropout rate. Default: 0.
+        drop_path_rate (float) : drop path rate. Default: 0.1.
+        attn_stage (str) : block will have a attention layer if value = '1' else not. Default: '1111'.
+        pos_embed (bool) : position embedding. Default: True.
+        spatial_conv (str) : block will have a spatial convolution layer if value = '1' else not. Default: '1111'.
+        group (int) : convolution group. Default: 8.
+        pool (bool) : if true will use global_pooling else not. Default: True.
+        conv_init : if true will init convolution weights else not. Default: False.
+    """
+
+    def __init__(
+        self,
+        img_size: int = 224,
+        init_channels: int = 32,
+        num_classes: int = 1000,
+        embed_dim: int = 384,
+        depth: List[int] = None,
+        num_heads: List[int] = None,
+        mlp_ratio: float = 4.0,
+        qkv_bias: bool = False,
+        qk_scale: float = None,
+        drop_rate: float = 0.0,
+        attn_drop_rate: float = 0.0,
+        drop_path_rate: float = 0.1,
+        attn_stage: str = "1111",
+        pos_embed: bool = True,
+        spatial_conv: str = "1111",
+        group: int = 8,
+        pool: bool = True,
+        conv_init: bool = False,
+    ) -> None:
+        super(Visformer, self).__init__()
+        self.num_classes = num_classes
+        self.num_features = self.embed_dim = embed_dim
+        self.init_channels = init_channels
+        self.img_size = img_size
+        self.pool = pool
+        self.conv_init = conv_init
+        self.depth = depth
+        assert (isinstance(depth, list) or isinstance(depth, tuple)) and len(depth) == 4
+        if not (isinstance(num_heads, list) or isinstance(num_heads, tuple)):
+            num_heads = [num_heads] * 4
+
+        self.pos_embed = pos_embed
+        dpr = np.linspace(0, drop_path_rate, sum(depth)).tolist()
+
+        self.stem = nn.SequentialCell([
+            nn.Conv2d(3, self.init_channels, 7, 2, pad_mode="pad", padding=3),
+            nn.BatchNorm2d(self.init_channels),
+            nn.ReLU()
+        ])
+        img_size //= 2
+
+        self.pos_drop = Dropout(p=drop_rate)
+        # stage0
+        if depth[0]:
+            self.patch_embed0 = PatchEmbed(img_size=img_size, patch_size=2, in_chans=self.init_channels,
+                                           embed_dim=embed_dim // 4)
+            img_size //= 2
+            if self.pos_embed:
+                self.pos_embed0 = mindspore.Parameter(
+                    ops.zeros((1, embed_dim // 4, img_size, img_size), mindspore.float32))
+            self.stage0 = nn.CellList([
+                Block(dim=embed_dim // 4, num_heads=num_heads[0], head_dim_ratio=0.25, mlp_ratio=mlp_ratio,
+                      qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
+                      group=group, attn_disabled=(attn_stage[0] == "0"), spatial_conv=(spatial_conv[0] == "1"))
+                for i in range(depth[0])
+            ])
+
+        # stage1
+        if depth[0]:
+            self.patch_embed1 = PatchEmbed(img_size=img_size, patch_size=2, in_chans=embed_dim // 4,
+                                           embed_dim=embed_dim // 2)
+            img_size //= 2
+        else:
+            self.patch_embed1 = PatchEmbed(img_size=img_size, patch_size=4, in_chans=self.init_channels,
+                                           embed_dim=embed_dim // 2)
+            img_size //= 4
+
+        if self.pos_embed:
+            self.pos_embed1 = mindspore.Parameter(ops.zeros((1, embed_dim // 2, img_size, img_size), mindspore.float32))
+
+        self.stage1 = nn.CellList([
+            Block(
+                dim=embed_dim // 2, num_heads=num_heads[1], head_dim_ratio=0.5, mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
+                group=group, attn_disabled=(attn_stage[1] == "0"), spatial_conv=(spatial_conv[1] == "1")
+            )
+            for i in range(sum(depth[:1]), sum(depth[:2]))
+        ])
+
+        # stage2
+        self.patch_embed2 = PatchEmbed(img_size=img_size, patch_size=2, in_chans=embed_dim // 2, embed_dim=embed_dim)
+        img_size //= 2
+        if self.pos_embed:
+            self.pos_embed2 = mindspore.Parameter(ops.zeros((1, embed_dim, img_size, img_size), mindspore.float32))
+        self.stage2 = nn.CellList([
+            Block(
+                dim=embed_dim, num_heads=num_heads[2], head_dim_ratio=1.0, mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
+                group=group, attn_disabled=(attn_stage[2] == "0"), spatial_conv=(spatial_conv[2] == "1")
+            )
+            for i in range(sum(depth[:2]), sum(depth[:3]))
+        ])
+
+        # stage3
+        self.patch_embed3 = PatchEmbed(img_size=img_size, patch_size=2, in_chans=embed_dim, embed_dim=embed_dim * 2)
+        img_size //= 2
+        if self.pos_embed:
+            self.pos_embed3 = mindspore.Parameter(ops.zeros((1, embed_dim * 2, img_size, img_size), mindspore.float32))
+        self.stage3 = nn.CellList([
+            Block(
+                dim=embed_dim * 2, num_heads=num_heads[3], head_dim_ratio=1.0, mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
+                group=group, attn_disabled=(attn_stage[3] == "0"), spatial_conv=(spatial_conv[3] == "1")
+            )
+            for i in range(sum(depth[:3]), sum(depth[:4]))
+        ])
+
+        # head
+        if self.pool:
+            self.global_pooling = GlobalAvgPooling()
+
+        self.norm = nn.BatchNorm2d(embed_dim * 2)
+        self.head = nn.Dense(embed_dim * 2, num_classes)
+
+        # weight init
+        if self.pos_embed:
+            if depth[0]:
+                self.pos_embed0.set_data(initializer(TruncatedNormal(0.02),
+                                                     self.pos_embed0.shape, self.pos_embed0.dtype))
+            self.pos_embed1.set_data(initializer(TruncatedNormal(0.02),
+                                                 self.pos_embed1.shape, self.pos_embed1.dtype))
+            self.pos_embed2.set_data(initializer(TruncatedNormal(0.02),
+                                                 self.pos_embed2.shape, self.pos_embed2.dtype))
+            self.pos_embed3.set_data(initializer(TruncatedNormal(0.02),
+                                                 self.pos_embed3.shape, self.pos_embed3.dtype))
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Dense):
+                cell.weight.set_data(initializer(TruncatedNormal(0.02), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(initializer(Constant(0), cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.LayerNorm):
+                cell.beta.set_data(initializer(Constant(0), cell.beta.shape, cell.beta.dtype))
+                cell.gamma.set_data(initializer(Constant(1), cell.gamma.shape, cell.gamma.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.beta.set_data(initializer(Constant(0), cell.beta.shape, cell.beta.dtype))
+                cell.gamma.set_data(initializer(Constant(1), cell.gamma.shape, cell.gamma.dtype))
+            elif isinstance(cell, nn.Conv2d):
+                if self.conv_init:
+                    cell.weight.set_data(initializer(HeNormal(mode="fan_out", nonlinearity="relu"), cell.weight.shape,
+                                                     cell.weight.dtype))
+                else:
+                    cell.weight.set_data(initializer(TruncatedNormal(0.02), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(initializer(Constant(0), cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.stem(x)
+
+        # stage 0
+        if self.depth[0]:
+            x = self.patch_embed0(x)
+            if self.pos_embed:
+                x = x + self.pos_embed0
+                x = self.pos_drop(x)
+            for b in self.stage0:
+                x = b(x)
+
+        # stage 1
+        x = self.patch_embed1(x)
+        if self.pos_embed:
+            x = x + self.pos_embed1
+            x = self.pos_drop(x)
+        for b in self.stage1:
+            x = b(x)
+
+        # stage 2
+        x = self.patch_embed2(x)
+        if self.pos_embed:
+            x = x + self.pos_embed2
+            x = self.pos_drop(x)
+        for b in self.stage2:
+            x = b(x)
+
+        # stage 3
+        x = self.patch_embed3(x)
+        if self.pos_embed:
+            x = x + self.pos_embed3
+            x = self.pos_drop(x)
+        for b in self.stage3:
+            x = b(x)
+        x = self.norm(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        # head
+        if self.pool:
+            x = self.global_pooling(x)
+        else:
+            x = x[:, :, 0, 0]
+        x = self.head(x.view(x.shape[0], -1))
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.visformer.visformer_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get visformer small model. +Refer to the base class 'models.visformer' for more details.

+ +
+ Source code in mindcv/models/visformer.py +
468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
@register_model
+def visformer_small(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """Get visformer small model.
+    Refer to the base class 'models.visformer' for more details.
+    """
+    default_cfg = default_cfgs["visformer_small"]
+    model = Visformer(img_size=224, init_channels=32, num_classes=num_classes, embed_dim=384,
+                      depth=[0, 7, 4, 4], num_heads=[6, 6, 6, 6], mlp_ratio=4., group=8,
+                      attn_stage="0011", spatial_conv="1100", conv_init=True, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.visformer.visformer_small_v2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get visformer small2 model. +Refer to the base class 'models.visformer' for more details.

+ +
+ Source code in mindcv/models/visformer.py +
482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
@register_model
+def visformer_small_v2(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """Get visformer small2 model.
+    Refer to the base class 'models.visformer' for more details.
+    """
+    default_cfg = default_cfgs["visformer_small_v2"]
+    model = Visformer(img_size=224, init_channels=32, num_classes=num_classes, embed_dim=256,
+                      depth=[1, 10, 14, 3], num_heads=[2, 4, 8, 16], mlp_ratio=4., qk_scale=-0.5,
+                      group=8, attn_stage="0011", spatial_conv="1100", conv_init=True, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.visformer.visformer_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get visformer tiny model. +Refer to the base class 'models.visformer' for more details.

+ +
+ Source code in mindcv/models/visformer.py +
439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
@register_model
+def visformer_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """Get visformer tiny model.
+    Refer to the base class 'models.visformer' for more details.
+    """
+    default_cfg = default_cfgs["visformer_tiny"]
+    model = Visformer(img_size=224, init_channels=16, num_classes=num_classes, embed_dim=192,
+                      depth=[0, 7, 4, 4], num_heads=[3, 3, 3, 3], mlp_ratio=4., group=8,
+                      attn_stage="0011", spatial_conv="1100", drop_path_rate=0.03, conv_init=True, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.visformer.visformer_tiny_v2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get visformer tiny2 model. +Refer to the base class 'models.visformer' for more details.

+ +
+ Source code in mindcv/models/visformer.py +
454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
@register_model
+def visformer_tiny_v2(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """Get visformer tiny2 model.
+    Refer to the base class 'models.visformer' for more details.
+    """
+    default_cfg = default_cfgs["visformer_tiny_v2"]
+    model = Visformer(img_size=224, init_channels=24, num_classes=num_classes, embed_dim=192,
+                      depth=[1, 4, 6, 3], num_heads=[1, 3, 6, 12], mlp_ratio=4., qk_scale=-0.5, group=8,
+                      attn_stage="0011", spatial_conv="1100", drop_path_rate=0.03, conv_init=True, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

vit

+ + +
+ + + +

+ mindcv.models.vit.ViT + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Vision Transformer architecture implementation.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
image_size +
+

Input image size. Default: 224.

+
+

+ + TYPE: + int + + + DEFAULT: + 224 + +

+
input_channels +
+

The number of input channel. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
patch_size +
+

Patch size of image. Default: 16.

+
+

+ + TYPE: + int + + + DEFAULT: + 16 + +

+
embed_dim +
+

The dimension of embedding. Default: 768.

+
+

+ + TYPE: + int + + + DEFAULT: + 768 + +

+
num_layers +
+

The depth of transformer. Default: 12.

+
+

+ + TYPE: + int + + + DEFAULT: + 12 + +

+
num_heads +
+

The number of attention heads. Default: 12.

+
+

+ + TYPE: + int + + + DEFAULT: + 12 + +

+
mlp_dim +
+

The dimension of MLP hidden layer. Default: 3072.

+
+

+ + TYPE: + int + + + DEFAULT: + 3072 + +

+
keep_prob +
+

The keep rate, greater than 0 and less equal than 1. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
attention_keep_prob +
+

The keep rate for attention layer. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
drop_path_keep_prob +
+

The keep rate for drop path. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
activation +
+

Activation function which will be stacked on top of the +normalization layer (if not None), otherwise on top of the conv layer. Default: nn.GELU.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + nn.GELU + +

+
norm +
+

Norm layer that will be stacked on top of the convolution +layer. Default: nn.LayerNorm.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + nn.LayerNorm + +

+
pool +
+

The method of pooling. Default: 'cls'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'cls' + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, 768)

+
+ + + + + + + + + + + + + + + +
RAISESDESCRIPTION
+ + ValueError + + +
+

If split is not 'train', 'test' or 'infer'.

+
+
+ +
+ Supported Platforms +

GPU

+
+ + +

Examples:

+
>>> net = ViT()
+>>> x = ms.Tensor(np.ones([1, 3, 224, 224]), ms.float32)
+>>> output = net(x)
+>>> print(output.shape)
+(1, 768)
+
+

About ViT:

+

Vision Transformer (ViT) shows that a pure transformer applied directly to sequences of image +patches can perform very well on image classification tasks. When pre-trained on large amounts +of data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet, +CIFAR-100, VTAB, etc.), Vision Transformer (ViT) attains excellent results compared to state-of-the-art +convolutional networks while requiring substantially fewer computational resources to train.

+

Citation:

+

.. code-block::

+
@article{2020An,
+title={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale},
+author={Dosovitskiy, A. and Beyer, L. and Kolesnikov, A. and Weissenborn, D. and Houlsby, N.},
+year={2020},
+}
+
+ +
+ Source code in mindcv/models/vit.py +
468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
+513
+514
+515
+516
+517
+518
+519
+520
+521
+522
+523
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
class ViT(nn.Cell):
+    """
+    Vision Transformer architecture implementation.
+
+    Args:
+        image_size (int): Input image size. Default: 224.
+        input_channels (int): The number of input channel. Default: 3.
+        patch_size (int): Patch size of image. Default: 16.
+        embed_dim (int): The dimension of embedding. Default: 768.
+        num_layers (int): The depth of transformer. Default: 12.
+        num_heads (int): The number of attention heads. Default: 12.
+        mlp_dim (int): The dimension of MLP hidden layer. Default: 3072.
+        keep_prob (float): The keep rate, greater than 0 and less equal than 1. Default: 1.0.
+        attention_keep_prob (float): The keep rate for attention layer. Default: 1.0.
+        drop_path_keep_prob (float): The keep rate for drop path. Default: 1.0.
+        activation (nn.Cell): Activation function which will be stacked on top of the
+            normalization layer (if not None), otherwise on top of the conv layer. Default: nn.GELU.
+        norm (nn.Cell, optional): Norm layer that will be stacked on top of the convolution
+            layer. Default: nn.LayerNorm.
+        pool (str): The method of pooling. Default: 'cls'.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, 768)`
+
+    Raises:
+        ValueError: If `split` is not 'train', 'test' or 'infer'.
+
+    Supported Platforms:
+        ``GPU``
+
+    Examples:
+        >>> net = ViT()
+        >>> x = ms.Tensor(np.ones([1, 3, 224, 224]), ms.float32)
+        >>> output = net(x)
+        >>> print(output.shape)
+        (1, 768)
+
+    About ViT:
+
+    Vision Transformer (ViT) shows that a pure transformer applied directly to sequences of image
+    patches can perform very well on image classification tasks. When pre-trained on large amounts
+    of data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet,
+    CIFAR-100, VTAB, etc.), Vision Transformer (ViT) attains excellent results compared to state-of-the-art
+    convolutional networks while requiring substantially fewer computational resources to train.
+
+    Citation:
+
+    .. code-block::
+
+        @article{2020An,
+        title={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale},
+        author={Dosovitskiy, A. and Beyer, L. and Kolesnikov, A. and Weissenborn, D. and Houlsby, N.},
+        year={2020},
+        }
+    """
+
+    def __init__(
+        self,
+        image_size: int = 224,
+        input_channels: int = 3,
+        patch_size: int = 16,
+        embed_dim: int = 768,
+        num_layers: int = 12,
+        num_heads: int = 12,
+        mlp_dim: int = 3072,
+        keep_prob: float = 1.0,
+        attention_keep_prob: float = 1.0,
+        drop_path_keep_prob: float = 1.0,
+        activation: nn.Cell = nn.GELU,
+        norm: Optional[nn.Cell] = nn.LayerNorm,
+        pool: str = "cls",
+    ) -> None:
+        super().__init__()
+
+        self.patch_embedding = PatchEmbedding(image_size=image_size,
+                                              patch_size=patch_size,
+                                              embed_dim=embed_dim,
+                                              input_channels=input_channels)
+        num_patches = self.patch_embedding.num_patches
+
+        if pool == "cls":
+            self.cls_token = init(init_type=Normal(sigma=1.0),
+                                  shape=(1, 1, embed_dim),
+                                  dtype=ms.float32,
+                                  name="cls",
+                                  requires_grad=True)
+            self.pos_embedding = init(init_type=Normal(sigma=1.0),
+                                      shape=(1, num_patches + 1, embed_dim),
+                                      dtype=ms.float32,
+                                      name="pos_embedding",
+                                      requires_grad=True)
+            self.concat = ops.Concat(axis=1)
+        else:
+            self.pos_embedding = init(init_type=Normal(sigma=1.0),
+                                      shape=(1, num_patches, embed_dim),
+                                      dtype=ms.float32,
+                                      name="pos_embedding",
+                                      requires_grad=True)
+            self.mean = ops.ReduceMean(keep_dims=False)
+
+        self.pool = pool
+        self.pos_dropout = Dropout(p=1.0-keep_prob)
+        self.norm = norm((embed_dim,))
+        self.tile = ops.Tile()
+        self.transformer = TransformerEncoder(
+            dim=embed_dim,
+            num_layers=num_layers,
+            num_heads=num_heads,
+            mlp_dim=mlp_dim,
+            keep_prob=keep_prob,
+            attention_keep_prob=attention_keep_prob,
+            drop_path_keep_prob=drop_path_keep_prob,
+            activation=activation,
+            norm=norm,
+        )
+
+    def construct(self, x):
+        """ViT construct."""
+        x = self.patch_embedding(x)
+
+        if self.pool == "cls":
+            cls_tokens = self.tile(self.cls_token, (x.shape[0], 1, 1))
+            x = self.concat((cls_tokens, x))
+            x += self.pos_embedding
+        else:
+            x += self.pos_embedding
+        x = self.pos_dropout(x)
+        x = self.transformer(x)
+        x = self.norm(x)
+
+        if self.pool == "cls":
+            x = x[:, 0]
+        else:
+            x = self.mean(x, (1, ))  # (1,) or (1,2)
+        return x
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.vit.ViT.construct(x) + +

+ + +
+ +

ViT construct.

+ +
+ Source code in mindcv/models/vit.py +
587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
def construct(self, x):
+    """ViT construct."""
+    x = self.patch_embedding(x)
+
+    if self.pool == "cls":
+        cls_tokens = self.tile(self.cls_token, (x.shape[0], 1, 1))
+        x = self.concat((cls_tokens, x))
+        x += self.pos_embedding
+    else:
+        x += self.pos_embedding
+    x = self.pos_dropout(x)
+    x = self.transformer(x)
+    x = self.norm(x)
+
+    if self.pool == "cls":
+        x = x[:, 0]
+    else:
+        x = self.mean(x, (1, ))  # (1,) or (1,2)
+    return x
+
+
+
+ +
+ + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.vit.vit_b_16_224(pretrained=False, num_classes=1000, in_channels=3, image_size=224, has_logits=False, drop_rate=0.0, drop_path_rate=0.0) + +

+ + +
+ +

Constructs a vit_b_16 architecture from +An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>_.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

Whether to download and load the pre-trained model. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The number of classification. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The number of input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
image_size +
+

The input image size. Default: 224 for ImageNet.

+
+

+ + TYPE: + int + + + DEFAULT: + 224 + +

+
has_logits +
+

Whether has logits or not. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
drop_rate +
+

The drop out rate. Default: 0.0.s

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
drop_path_rate +
+

The stochastic depth rate. Default: 0.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + ViT + + +
+

ViT network, MindSpore.nn.Cell

+
+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+ + +

Examples:

+
>>> net = vit_b_16_224()
+>>> x = ms.Tensor(np.ones([1, 3, 224, 224]), ms.float32)
+>>> output = net(x)
+>>> print(output.shape)
+(1, 1000)
+
+ +
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out})

+
+
+ Supported Platforms +

GPU

+
+
+ Source code in mindcv/models/vit.py +
663
+664
+665
+666
+667
+668
+669
+670
+671
+672
+673
+674
+675
+676
+677
+678
+679
+680
+681
+682
+683
+684
+685
+686
+687
+688
+689
+690
+691
+692
+693
+694
+695
+696
+697
+698
+699
+700
+701
+702
+703
+704
+705
+706
+707
+708
+709
+710
+711
+712
+713
+714
+715
+716
+717
+718
+719
+720
+721
+722
+723
+724
@register_model
+def vit_b_16_224(
+    pretrained: bool = False,
+    num_classes: int = 1000,
+    in_channels: int = 3,
+    image_size: int = 224,
+    has_logits: bool = False,
+    drop_rate: float = 0.0,
+    # attention-dropout: float = 0.0,
+    drop_path_rate: float = 0.0,
+) -> ViT:
+    """
+    Constructs a vit_b_16 architecture from
+    `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>`_.
+
+    Args:
+        pretrained (bool): Whether to download and load the pre-trained model. Default: False.
+        num_classes (int): The number of classification. Default: 1000.
+        in_channels (int): The number of input channels. Default: 3.
+        image_size (int): The input image size. Default: 224 for ImageNet.
+        has_logits (bool): Whether has logits or not. Default: False.
+        drop_rate (float): The drop out rate. Default: 0.0.s
+        drop_path_rate (float): The stochastic depth rate. Default: 0.0.
+
+    Returns:
+        ViT network, MindSpore.nn.Cell
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Examples:
+        >>> net = vit_b_16_224()
+        >>> x = ms.Tensor(np.ones([1, 3, 224, 224]), ms.float32)
+        >>> output = net(x)
+        >>> print(output.shape)
+        (1, 1000)
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`
+
+    Supported Platforms:
+        ``GPU``
+    """
+    config = ConfigDict()
+    config.image_size = image_size
+    config.num_classes = num_classes
+    config.patch_size = 16
+    config.embed_dim = 768
+    config.mlp_dim = 3072
+    config.num_heads = 12
+    config.num_layers = 12
+    config.dropout = drop_rate
+    config.attention_dropout = drop_rate  # attention-dropout
+    config.drop_path_rate = drop_path_rate
+    config.pretrained = pretrained
+    config.input_channels = in_channels
+    config.pool = "cls"
+    config.representation_size = 768 if has_logits else None
+
+    config.url_cfg = default_cfgs["vit_b_16_224"]
+
+    return vit(**config)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vit.vit_b_16_384(pretrained=False, num_classes=1000, in_channels=3, image_size=384, has_logits=False, drop_rate=0.0, drop_path_rate=0.0) + +

+ + +
+ +

construct and return a ViT network

+ +
+ Source code in mindcv/models/vit.py +
727
+728
+729
+730
+731
+732
+733
+734
+735
+736
+737
+738
+739
+740
+741
+742
+743
+744
+745
+746
+747
+748
+749
+750
+751
+752
+753
+754
+755
+756
+757
@register_model
+def vit_b_16_384(
+    pretrained: bool = False,
+    num_classes: int = 1000,
+    in_channels: int = 3,
+    image_size: int = 384,
+    has_logits: bool = False,
+    drop_rate: float = 0.0,
+    # attention-dropout: float = 0.0,
+    drop_path_rate: float = 0.0,
+) -> ViT:
+    """construct and return a ViT network"""
+    config = ConfigDict()
+    config.image_size = image_size
+    config.num_classes = num_classes
+    config.patch_size = 16
+    config.embed_dim = 768
+    config.mlp_dim = 3072
+    config.num_heads = 12
+    config.num_layers = 12
+    config.dropout = drop_rate
+    config.attention_dropout = drop_rate  # attention-dropout
+    config.drop_path_rate = drop_path_rate
+    config.pretrained = pretrained
+    config.input_channels = in_channels
+    config.pool = "cls"
+    config.representation_size = 768 if has_logits else None
+
+    config.url_cfg = default_cfgs["vit_b_16_384"]
+
+    return vit(**config)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vit.vit_b_32_224(pretrained=False, num_classes=1000, in_channels=3, image_size=224, has_logits=False, drop_rate=0.0, drop_path_rate=0.0) + +

+ + +
+ +

construct and return a ViT network

+ +
+ Source code in mindcv/models/vit.py +
828
+829
+830
+831
+832
+833
+834
+835
+836
+837
+838
+839
+840
+841
+842
+843
+844
+845
+846
+847
+848
+849
+850
+851
+852
+853
+854
+855
+856
+857
+858
@register_model
+def vit_b_32_224(
+    pretrained: bool = False,
+    num_classes: int = 1000,
+    in_channels: int = 3,
+    image_size: int = 224,
+    has_logits: bool = False,
+    drop_rate: float = 0.0,
+    # attention-dropout: float = 0.0,
+    drop_path_rate: float = 0.0,
+) -> ViT:
+    """construct and return a ViT network"""
+    config = ConfigDict()
+    config.image_size = image_size
+    config.num_classes = num_classes
+    config.patch_size = 32
+    config.embed_dim = 768
+    config.mlp_dim = 3072
+    config.num_heads = 12
+    config.num_layers = 12
+    config.dropout = drop_rate
+    config.attention_dropout = drop_rate  # attention-dropout
+    config.drop_path_rate = drop_path_rate
+    config.pretrained = pretrained
+    config.input_channels = in_channels
+    config.pool = "cls"
+    config.representation_size = 768 if has_logits else None
+
+    config.url_cfg = default_cfgs["vit_b_32_224"]
+
+    return vit(**config)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vit.vit_b_32_384(pretrained=False, num_classes=1000, in_channels=3, image_size=384, has_logits=False, drop_rate=0.0, drop_path_rate=0.0) + +

+ + +
+ +

construct and return a ViT network

+ +
+ Source code in mindcv/models/vit.py +
861
+862
+863
+864
+865
+866
+867
+868
+869
+870
+871
+872
+873
+874
+875
+876
+877
+878
+879
+880
+881
+882
+883
+884
+885
+886
+887
+888
+889
+890
+891
@register_model
+def vit_b_32_384(
+    pretrained: bool = False,
+    num_classes: int = 1000,
+    in_channels: int = 3,
+    image_size: int = 384,
+    has_logits: bool = False,
+    drop_rate: float = 0.0,
+    # attention_dropout: float = 0.0,
+    drop_path_rate: float = 0.0,
+) -> ViT:
+    """construct and return a ViT network"""
+    config = ConfigDict()
+    config.image_size = image_size
+    config.num_classes = num_classes
+    config.patch_size = 32
+    config.embed_dim = 768
+    config.mlp_dim = 3072
+    config.num_heads = 12
+    config.num_layers = 12
+    config.dropout = drop_rate
+    config.attention_dropout = drop_rate  # attention_dropout
+    config.drop_path_rate = drop_path_rate
+    config.pretrained = pretrained
+    config.input_channels = in_channels
+    config.pool = "cls"
+    config.representation_size = 768 if has_logits else None
+
+    config.url_cfg = default_cfgs["vit_b_32_384"]
+
+    return vit(**config)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vit.vit_l_16_224(pretrained=False, num_classes=1000, in_channels=3, image_size=224, has_logits=False, drop_rate=0.0, drop_path_rate=0.0) + +

+ + +
+ +

construct and return a ViT network

+ +
+ Source code in mindcv/models/vit.py +
760
+761
+762
+763
+764
+765
+766
+767
+768
+769
+770
+771
+772
+773
+774
+775
+776
+777
+778
+779
+780
+781
+782
+783
+784
+785
+786
+787
+788
+789
+790
+791
@register_model
+def vit_l_16_224(
+    pretrained: bool = False,
+    num_classes: int = 1000,
+    in_channels: int = 3,
+    image_size: int = 224,
+    has_logits: bool = False,
+    drop_rate: float = 0.0,
+    # attention-dropout: float = 0.0,
+    drop_path_rate: float = 0.0,
+) -> ViT:
+    """construct and return a ViT network"""
+
+    config = ConfigDict()
+    config.image_size = image_size
+    config.num_classes = num_classes
+    config.patch_size = 16
+    config.embed_dim = 1024
+    config.mlp_dim = 4096
+    config.num_heads = 16
+    config.num_layers = 24
+    config.dropout = drop_rate
+    config.attention_dropout = drop_rate  # attention-dropout
+    config.drop_path_rate = drop_path_rate
+    config.input_channels = in_channels
+    config.pool = "cls"
+    config.pretrained = pretrained
+    config.representation_size = 1024 if has_logits else None
+
+    config.url_cfg = default_cfgs["vit_l_16_224"]
+
+    return vit(**config)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vit.vit_l_16_384(pretrained=False, num_classes=1000, in_channels=3, image_size=384, has_logits=False, drop_rate=0.0, drop_path_rate=0.0) + +

+ + +
+ +

construct and return a ViT network

+ +
+ Source code in mindcv/models/vit.py +
794
+795
+796
+797
+798
+799
+800
+801
+802
+803
+804
+805
+806
+807
+808
+809
+810
+811
+812
+813
+814
+815
+816
+817
+818
+819
+820
+821
+822
+823
+824
+825
@register_model
+def vit_l_16_384(
+    pretrained: bool = False,
+    num_classes: int = 1000,
+    in_channels: int = 3,
+    image_size: int = 384,
+    has_logits: bool = False,
+    drop_rate: float = 0.0,
+    # attention-dropout: float = 0.0,
+    drop_path_rate: float = 0.0,
+) -> ViT:
+    """construct and return a ViT network"""
+
+    config = ConfigDict()
+    config.image_size = image_size
+    config.num_classes = num_classes
+    config.patch_size = 16
+    config.embed_dim = 1024
+    config.mlp_dim = 4096
+    config.num_heads = 16
+    config.num_layers = 24
+    config.dropout = drop_rate
+    config.attention_dropout = drop_rate  # attention-dropout
+    config.drop_path_rate = drop_path_rate
+    config.input_channels = in_channels
+    config.pool = "cls"
+    config.pretrained = pretrained
+    config.representation_size = 1024 if has_logits else None
+
+    config.url_cfg = default_cfgs["vit_l_16_384"]
+
+    return vit(**config)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vit.vit_l_32_224(pretrained=False, num_classes=1000, in_channels=3, image_size=224, has_logits=False, drop_rate=0.0, drop_path_rate=0.0) + +

+ + +
+ +

construct and return a ViT network

+ +
+ Source code in mindcv/models/vit.py +
894
+895
+896
+897
+898
+899
+900
+901
+902
+903
+904
+905
+906
+907
+908
+909
+910
+911
+912
+913
+914
+915
+916
+917
+918
+919
+920
+921
+922
+923
+924
@register_model
+def vit_l_32_224(
+    pretrained: bool = False,
+    num_classes: int = 1000,
+    in_channels: int = 3,
+    image_size: int = 224,
+    has_logits: bool = False,
+    drop_rate: float = 0.0,
+    # attention-dropout: float = 0.0,
+    drop_path_rate: float = 0.0,
+) -> ViT:
+    """construct and return a ViT network"""
+    config = ConfigDict()
+    config.image_size = image_size
+    config.num_classes = num_classes
+    config.patch_size = 32
+    config.embed_dim = 1024
+    config.mlp_dim = 4096
+    config.num_heads = 16
+    config.num_layers = 24
+    config.dropout = drop_rate
+    config.attention_dropout = drop_rate  # attention-dropout
+    config.drop_path_rate = drop_path_rate
+    config.pretrained = pretrained
+    config.input_channels = in_channels
+    config.pool = "cls"
+    config.representation_size = 1024 if has_logits else None
+
+    config.url_cfg = default_cfgs["vit_l_32_224"]
+
+    return vit(**config)
+
+
+
+ +

volo

+ + +
+ + + +

+ mindcv.models.volo.VOLO + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Vision Outlooker, the main class of our model +--layers: [x,x,x,x], four blocks in two stages, the first block is outlooker, the + other three are transformer, we set four blocks, which are easily + applied to downstream tasks +--img_size, --in_channels, --num_classes: these three are very easy to understand +--patch_size: patch_size in outlook attention +--stem_hidden_dim: hidden dim of patch embedding, d1-d4 is 64, d5 is 128 +--embed_dims, --num_heads: embedding dim, number of heads in each block +--downsamples: flags to apply downsampling or not +--outlook_attention: flags to apply outlook attention or not +--mlp_ratios, --qkv_bias, --qk_scale, --drop_rate: easy to undertand +--attn_drop_rate, --drop_path_rate, --norm_layer: easy to undertand +--post_layers: post layers like two class attention layers using [ca, ca], + if yes, return_mean=False +--return_mean: use mean of all feature tokens for classification, if yes, no class token +--return_dense: use token labeling, details are here: + https://github.com/zihangJiang/TokenLabeling +--mix_token: mixing tokens as token labeling, details are here: + https://github.com/zihangJiang/TokenLabeling +--pooling_scale: pooling_scale=2 means we downsample 2x +--out_kernel, --out_stride, --out_padding: kerner size, + stride, and padding for outlook attention

+ +
+ Source code in mindcv/models/volo.py +
550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
+613
+614
+615
+616
+617
+618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
+663
+664
+665
+666
+667
+668
+669
+670
+671
+672
+673
+674
+675
+676
+677
+678
+679
+680
+681
+682
+683
+684
+685
+686
+687
+688
+689
+690
+691
+692
+693
+694
+695
+696
+697
+698
+699
+700
+701
+702
+703
+704
+705
+706
+707
+708
+709
+710
+711
+712
+713
+714
+715
+716
+717
+718
+719
+720
+721
+722
+723
+724
+725
+726
+727
+728
+729
+730
+731
+732
+733
+734
+735
+736
+737
+738
+739
+740
+741
+742
class VOLO(nn.Cell):
+    """
+    Vision Outlooker, the main class of our model
+    --layers: [x,x,x,x], four blocks in two stages, the first block is outlooker, the
+              other three are transformer, we set four blocks, which are easily
+              applied to downstream tasks
+    --img_size, --in_channels, --num_classes: these three are very easy to understand
+    --patch_size: patch_size in outlook attention
+    --stem_hidden_dim: hidden dim of patch embedding, d1-d4 is 64, d5 is 128
+    --embed_dims, --num_heads: embedding dim, number of heads in each block
+    --downsamples: flags to apply downsampling or not
+    --outlook_attention: flags to apply outlook attention or not
+    --mlp_ratios, --qkv_bias, --qk_scale, --drop_rate: easy to undertand
+    --attn_drop_rate, --drop_path_rate, --norm_layer: easy to undertand
+    --post_layers: post layers like two class attention layers using [ca, ca],
+                  if yes, return_mean=False
+    --return_mean: use mean of all feature tokens for classification, if yes, no class token
+    --return_dense: use token labeling, details are here:
+                    https://github.com/zihangJiang/TokenLabeling
+    --mix_token: mixing tokens as token labeling, details are here:
+                    https://github.com/zihangJiang/TokenLabeling
+    --pooling_scale: pooling_scale=2 means we downsample 2x
+    --out_kernel, --out_stride, --out_padding: kerner size,
+                                               stride, and padding for outlook attention
+    """
+    def __init__(
+        self,
+        layers,
+        img_size=224,
+        in_channels=3,
+        num_classes=1000,
+        patch_size=8,
+        stem_hidden_dim=64,
+        embed_dims=None,
+        num_heads=None,
+        downsamples=None,
+        outlook_attention=None,
+        mlp_ratios=None,
+        qkv_bias=False,
+        qk_scale=None,
+        drop_rate=0.0,
+        attn_drop_rate=0.0,
+        drop_path_rate=0.0,
+        norm_layer=nn.LayerNorm,
+        post_layers=None,
+        return_mean=False,
+        return_dense=True,
+        mix_token=True,
+        pooling_scale=2,
+        out_kernel=3,
+        out_stride=2,
+        out_padding=1,
+    ) -> None:
+
+        super().__init__()
+        self.num_classes = num_classes
+        self.patch_embed = PatchEmbed(stem_conv=True, stem_stride=2, patch_size=patch_size,
+                                      in_channels=in_channels, hidden_dim=stem_hidden_dim,
+                                      embed_dim=embed_dims[0])
+        # inital positional encoding, we add positional encoding after outlooker blocks
+        self.pos_embed = Parameter(
+            ops.zeros((1, img_size // patch_size // pooling_scale,
+                      img_size // patch_size // pooling_scale,
+                      embed_dims[-1]), mstype.float32))
+
+        self.pos_drop = Dropout(p=drop_rate)
+
+        # set the main block in network
+        network = []
+        for i in range(len(layers)):
+            if outlook_attention[i]:
+                # stage 1
+                stage = outlooker_blocks(Outlooker, i, embed_dims[i], layers,
+                                         downsample=downsamples[i], num_heads=num_heads[i],
+                                         kernel_size=out_kernel, stride=out_stride,
+                                         padding=out_padding, mlp_ratio=mlp_ratios[i],
+                                         qkv_bias=qkv_bias, qk_scale=qk_scale,
+                                         attn_drop=attn_drop_rate, norm_layer=norm_layer)
+                network.append(stage)
+            else:
+                # stage 2
+                stage = transformer_blocks(Transformer, i, embed_dims[i], layers,
+                                           num_heads[i], mlp_ratio=mlp_ratios[i],
+                                           qkv_bias=qkv_bias, qk_scale=qk_scale,
+                                           drop_path_rate=drop_path_rate,
+                                           attn_drop=attn_drop_rate,
+                                           norm_layer=norm_layer)
+                network.append(stage)
+
+            if downsamples[i]:
+                # downsampling between two stages
+                network.append(Downsample(embed_dims[i], embed_dims[i + 1], 2))
+
+        self.network = nn.CellList(network)
+
+        # set post block, for example, class attention layers
+        self.post_network = None
+        if post_layers is not None:
+            self.post_network = nn.CellList([
+                get_block(post_layers[i],
+                          dim=embed_dims[-1],
+                          num_heads=num_heads[-1],
+                          mlp_ratio=mlp_ratios[-1],
+                          qkv_bias=qkv_bias,
+                          qk_scale=qk_scale,
+                          attn_drop=attn_drop_rate,
+                          drop_path=0.0,
+                          norm_layer=norm_layer)
+                for i in range(len(post_layers))
+            ])
+            self.cls_token = Parameter(ops.zeros((1, 1, embed_dims[-1]), mstype.float32))
+            self.cls_token.set_data(init.initializer(init.TruncatedNormal(sigma=.02), self.cls_token.data.shape))
+
+        # set output type
+        self.return_mean = return_mean  # if yes, return mean, not use class token
+        self.return_dense = return_dense  # if yes, return class token and all feature tokens
+        if return_dense:
+            assert not return_mean, "cannot return both mean and dense"
+        self.mix_token = mix_token
+        self.pooling_scale = pooling_scale
+        if mix_token:  # enable token mixing, see token labeling for details.
+            self.beta = 1.0
+            assert return_dense, "return all tokens if mix_token is enabled"
+        if return_dense:
+            self.aux_head = nn.Dense(
+                embed_dims[-1],
+                num_classes) if num_classes > 0 else Identity()
+        self.norm = norm_layer([embed_dims[-1]])
+
+        # Classifier head
+        self.head = nn.Dense(
+            embed_dims[-1], num_classes) if num_classes > 0 else Identity()
+
+        self.pos_embed.set_data(init.initializer(init.TruncatedNormal(sigma=.02), self.pos_embed.data.shape))
+        self._init_weights()
+
+    def _init_weights(self) -> None:
+        for name, m in self.cells_and_names():
+            if isinstance(m, nn.Dense):
+                m.weight.set_data(init.initializer(init.TruncatedNormal(sigma=.02), m.weight.data.shape))
+                if m.bias is not None:
+                    m.bias.set_data(init.initializer(init.Constant(0), m.bias.shape))
+            elif isinstance(m, nn.LayerNorm):
+                m.gamma.set_data(init.initializer(init.Constant(1), m.gamma.shape))
+                m.beta.set_data(init.initializer(init.Constant(0), m.beta.shape))
+
+    def forward_embeddings(self, x: Tensor) -> Tensor:
+        # patch embedding
+        x = self.patch_embed(x)
+        # B,C,H,W-> B,H,W,C
+        x = ops.transpose(x, (0, 2, 3, 1))
+        return x
+
+    def forward_tokens(self, x: Tensor) -> Tensor:
+        for idx, block in enumerate(self.network):
+            if idx == 2:  # add positional encoding after outlooker blocks
+                x = x + self.pos_embed
+                x = self.pos_drop(x)
+            x = block(x)
+
+        B, H, W, C = x.shape
+        x = ops.reshape(x, (B, -1, C))
+        return x
+
+    def forward_cls(self, x: Tensor) -> Tensor:
+        # B, N, C = x.shape
+        cls_tokens = ops.broadcast_to(self.cls_token, (x.shape[0], -1, -1))
+        x = ops.Cast()(x, cls_tokens.dtype)
+        x = ops.concat([cls_tokens, x], 1)
+        for block in self.post_network:
+            x = block(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        # step1: patch embedding
+        x = self.forward_embeddings(x)
+
+        # step2: tokens learning in the two stages
+        x = self.forward_tokens(x)
+
+        # step3: post network, apply class attention or not
+        if self.post_network is not None:
+            x = self.forward_cls(x)
+        x = self.norm(x)
+
+        if self.return_mean:  # if no class token, return mean
+            return self.head(ops.mean(x, 1))
+
+        x_cls = self.head(x[:, 0])
+        if not self.return_dense:
+            return x_cls
+
+        return x_cls
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.volo.volo_d1(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

VOLO-D1 model, Params: 27M +--layers: [x,x,x,x], four blocks in two stages, the first stage(block) is outlooker, + the other three blocks are transformer, we set four blocks, which are easily + applied to downstream tasks +--embed_dims, --num_heads,: embedding dim, number of heads in each block +--downsamples: flags to apply downsampling or not in four blocks +--outlook_attention: flags to apply outlook attention or not +--mlp_ratios: mlp ratio in four blocks +--post_layers: post layers like two class attention layers using [ca, ca] +See detail for all args in the class VOLO()

+ +
+ Source code in mindcv/models/volo.py +
745
+746
+747
+748
+749
+750
+751
+752
+753
+754
+755
+756
+757
+758
+759
+760
+761
+762
+763
+764
+765
+766
+767
+768
+769
+770
+771
+772
+773
+774
+775
+776
@register_model
+def volo_d1(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """
+    VOLO-D1 model, Params: 27M
+    --layers: [x,x,x,x], four blocks in two stages, the first stage(block) is outlooker,
+            the other three blocks are transformer, we set four blocks, which are easily
+             applied to downstream tasks
+    --embed_dims, --num_heads,: embedding dim, number of heads in each block
+    --downsamples: flags to apply downsampling or not in four blocks
+    --outlook_attention: flags to apply outlook attention or not
+    --mlp_ratios: mlp ratio in four blocks
+    --post_layers: post layers like two class attention layers using [ca, ca]
+    See detail for all args in the class VOLO()
+    """
+    default_cfg = default_cfgs['volo_d1']
+
+    # first block is outlooker (stage1), the other three are transformer (stage2)
+    model = VOLO(layers=[4, 4, 8, 2],
+                 in_channels=in_channels,
+                 num_classes=num_classes,
+                 embed_dims=[192, 384, 384, 384],
+                 num_heads=[6, 12, 12, 12],
+                 mlp_ratios=[3, 3, 3, 3],
+                 downsamples=[True, False, False, False],
+                 outlook_attention=[True, False, False, False],
+                 post_layers=['ca', 'ca'],
+                 **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.volo.volo_d2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

VOLO-D2 model, Params: 59M

+ +
+ Source code in mindcv/models/volo.py +
779
+780
+781
+782
+783
+784
+785
+786
+787
+788
+789
+790
+791
+792
+793
+794
+795
+796
+797
+798
+799
@register_model
+def volo_d2(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """
+    VOLO-D2 model, Params: 59M
+    """
+    default_cfg = default_cfgs['volo_d2']
+    model = VOLO(layers=[6, 4, 10, 4],
+                 in_channels=in_channels,
+                 num_classes=num_classes,
+                 embed_dims=[256, 512, 512, 512],
+                 num_heads=[8, 16, 16, 16],
+                 mlp_ratios=[3, 3, 3, 3],
+                 downsamples=[True, False, False, False],
+                 outlook_attention=[True, False, False, False],
+                 post_layers=['ca', 'ca'],
+                 **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.volo.volo_d3(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

VOLO-D3 model, Params: 86M

+ +
+ Source code in mindcv/models/volo.py +
802
+803
+804
+805
+806
+807
+808
+809
+810
+811
+812
+813
+814
+815
+816
+817
+818
+819
+820
+821
+822
@register_model
+def volo_d3(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """
+    VOLO-D3 model, Params: 86M
+    """
+    default_cfg = default_cfgs['volo_d3']
+    model = VOLO(layers=[8, 8, 16, 4],
+                 in_channels=in_channels,
+                 num_classes=num_classes,
+                 embed_dims=[256, 512, 512, 512],
+                 num_heads=[8, 16, 16, 16],
+                 mlp_ratios=[3, 3, 3, 3],
+                 downsamples=[True, False, False, False],
+                 outlook_attention=[True, False, False, False],
+                 post_layers=['ca', 'ca'],
+                 **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.volo.volo_d4(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

VOLO-D4 model, Params: 193M

+ +
+ Source code in mindcv/models/volo.py +
825
+826
+827
+828
+829
+830
+831
+832
+833
+834
+835
+836
+837
+838
+839
+840
+841
+842
+843
+844
+845
@register_model
+def volo_d4(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """
+    VOLO-D4 model, Params: 193M
+    """
+    default_cfg = default_cfgs['volo_d4']
+    model = VOLO(layers=[8, 8, 16, 4],
+                 in_channels=in_channels,
+                 num_classes=num_classes,
+                 embed_dims=[384, 768, 768, 768],
+                 num_heads=[12, 16, 16, 16],
+                 mlp_ratios=[3, 3, 3, 3],
+                 downsamples=[True, False, False, False],
+                 outlook_attention=[True, False, False, False],
+                 post_layers=['ca', 'ca'],
+                 **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.volo.volo_d5(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

VOLO-D5 model, Params: 296M +stem_hidden_dim=128, the dim in patch embedding is 128 for VOLO-D5

+ +
+ Source code in mindcv/models/volo.py +
848
+849
+850
+851
+852
+853
+854
+855
+856
+857
+858
+859
+860
+861
+862
+863
+864
+865
+866
+867
+868
@register_model
+def volo_d5(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """
+    VOLO-D5 model, Params: 296M
+    stem_hidden_dim=128, the dim in patch embedding is 128 for VOLO-D5
+    """
+    default_cfg = default_cfgs['volo_d5']
+    model = VOLO(layers=[12, 12, 20, 4],
+                 embed_dims=[384, 768, 768, 768],
+                 num_heads=[12, 16, 16, 16],
+                 mlp_ratios=[4, 4, 4, 4],
+                 downsamples=[True, False, False, False],
+                 outlook_attention=[True, False, False, False],
+                 post_layers=['ca', 'ca'],
+                 stem_hidden_dim=128,
+                 **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

xcit

+ + +
+ + + +

+ mindcv.models.xcit.XCiT + + +

+ + +
+

+ Bases: nn.Cell

+ + +

XCiT model class, based on +"XCiT: Cross-Covariance Image Transformers" <https://arxiv.org/abs/2106.09681>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
img_size +
+

input image size

+
+

+ + TYPE: + (int, tuple) + + + DEFAULT: + 224 + +

+
patch_size +
+

patch size

+
+

+ + TYPE: + (int, tuple) + + + DEFAULT: + 16 + +

+
in_chans +
+

number of input channels

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classes for classification head

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
embed_dim +
+

embedding dimension

+
+

+ + TYPE: + int + + + DEFAULT: + 768 + +

+
depth +
+

depth of transformer

+
+

+ + TYPE: + int + + + DEFAULT: + 12 + +

+
num_heads +
+

number of attention heads

+
+

+ + TYPE: + int + + + DEFAULT: + 12 + +

+
mlp_ratio +
+

ratio of mlp hidden dim to embedding dim

+
+

+ + TYPE: + int + + + DEFAULT: + 4.0 + +

+
qkv_bias +
+

enable bias for qkv if True

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
qk_scale +
+

override default qk scale of head_dim ** -0.5 if set

+
+

+ + TYPE: + float + + + DEFAULT: + None + +

+
drop_rate +
+

dropout rate

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
attn_drop_rate +
+

attention dropout rate

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
drop_path_rate +
+

stochastic depth rate

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
norm_layer +
+

(nn.Module): normalization layer

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + None + +

+
cls_attn_layers +
+

(int) Depth of Class attention layers

+
+

+ + TYPE: + int + + + DEFAULT: + 2 + +

+
use_pos +
+

(bool) whether to use positional encoding

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
eta +
+

(float) layerscale initialization value

+
+

+ + TYPE: + float + + + DEFAULT: + None + +

+
tokens_norm +
+

(bool) Whether to normalize all tokens or just the cls_token in the CA

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/xcit.py +
354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
class XCiT(nn.Cell):
+    r"""XCiT model class, based on
+    `"XCiT: Cross-Covariance Image Transformers" <https://arxiv.org/abs/2106.09681>`_
+    Args:
+        img_size (int, tuple): input image size
+        patch_size (int, tuple): patch size
+        in_chans (int): number of input channels
+        num_classes (int): number of classes for classification head
+        embed_dim (int): embedding dimension
+        depth (int): depth of transformer
+        num_heads (int): number of attention heads
+        mlp_ratio (int): ratio of mlp hidden dim to embedding dim
+        qkv_bias (bool): enable bias for qkv if True
+        qk_scale (float): override default qk scale of head_dim ** -0.5 if set
+        drop_rate (float): dropout rate
+        attn_drop_rate (float): attention dropout rate
+        drop_path_rate (float): stochastic depth rate
+        norm_layer: (nn.Module): normalization layer
+        cls_attn_layers: (int) Depth of Class attention layers
+        use_pos: (bool) whether to use positional encoding
+        eta: (float) layerscale initialization value
+        tokens_norm: (bool) Whether to normalize all tokens or just the cls_token in the CA
+    """
+
+    def __init__(self,
+                 img_size: int = 224,
+                 patch_size: int = 16,
+                 in_chans: int = 3,
+                 num_classes: int = 1000,
+                 embed_dim: int = 768,
+                 depth: int = 12,
+                 num_heads: int = 12,
+                 mlp_ratio: int = 4.,
+                 qkv_bias: bool = True,
+                 qk_scale: float = None,
+                 drop_rate: float = 0.,
+                 attn_drop_rate: float = 0.,
+                 drop_path_rate: float = 0.,
+                 norm_layer: nn.Cell = None,
+                 cls_attn_layers: int = 2,
+                 use_pos: bool = True,
+                 patch_proj: str = 'linear',
+                 eta: float = None,
+                 tokens_norm: bool = False):
+        super().__init__()
+
+        self.num_classes = num_classes
+        self.num_features = self.embed_dim = embed_dim
+        norm_layer = norm_layer or partial(nn.LayerNorm, epsilon=1e-6)
+
+        self.patch_embed = ConvPatchEmbed(img_size=img_size, embed_dim=embed_dim,
+                                          patch_size=patch_size)
+
+        num_patches = self.patch_embed.num_patches
+
+        self.cls_token = Parameter(
+            ops.zeros((1, 1, embed_dim), mstype.float32))
+        self.pos_drop = Dropout(p=drop_rate)
+
+        dpr = [drop_path_rate for i in range(depth)]
+        self.blocks = nn.CellList([
+            XCABlock(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
+                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
+                norm_layer=norm_layer, num_tokens=num_patches, eta=eta)
+            for i in range(depth)])
+
+        self.cls_attn_blocks = nn.CellList([
+            ClassAttentionBlock(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
+                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, norm_layer=norm_layer,
+                eta=eta, tokens_norm=tokens_norm)
+            for i in range(cls_attn_layers)])
+        self.norm = norm_layer([embed_dim])
+        self.head = nn.Dense(
+            in_channels=embed_dim, out_channels=num_classes) if num_classes > 0 else ops.Identity()
+
+        self.pos_embeder = PositionalEncodingFourier(dim=embed_dim)
+        self.use_pos = use_pos
+
+        # Classifier head
+        self.cls_token.set_data(weight_init.initializer(weight_init.TruncatedNormal(sigma=0.02),
+                                                        self.cls_token.shape,
+                                                        self.cls_token.dtype))
+        self._init_weights()
+
+    def _init_weights(self) -> None:
+        for name, m in self.cells_and_names():
+            if isinstance(m, nn.Dense):
+                m.weight = weight_init.initializer(weight_init.TruncatedNormal(
+                    sigma=0.02), m.weight.shape, mindspore.float32)
+                if m.bias is not None:
+                    m.bias.set_data(weight_init.initializer(
+                        weight_init.Constant(0), m.bias.shape))
+            elif isinstance(m, nn.LayerNorm):
+                m.beta.set_data(weight_init.initializer(
+                    weight_init.Constant(0), m.beta.shape))
+                m.gamma.set_data(weight_init.initializer(
+                    weight_init.Constant(1), m.gamma.shape))
+
+    def forward_features(self, x):
+        B, C, H, W = x.shape
+        x, (Hp, Wp) = self.patch_embed(x)
+        if self.use_pos:
+            pos_encoding = self.pos_embeder(B, Hp, Wp).reshape(
+                B, -1, x.shape[1]).transpose(0, 2, 1)
+            x = x + pos_encoding
+        x = self.pos_drop(x)
+        for blk in self.blocks:
+            x = blk(x, Hp, Wp)
+        cls_tokens = ops.broadcast_to(self.cls_token, (B, -1, -1))
+        cls_tokens = ops.cast(cls_tokens, x.dtype)
+        x = ops.concat((cls_tokens, x), 1)
+
+        for blk in self.cls_attn_blocks:
+            x = blk(x, Hp, Wp)
+        return self.norm(x)[:, 0]
+
+    def construct(self, x):
+        x = self.forward_features(x)
+        x = self.head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.xcit.xcit_tiny_12_p16_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get xcit_tiny_12_p16_224 model. +Refer to the base class 'models.XCiT' for more details.

+ +
+ Source code in mindcv/models/xcit.py +
478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
@register_model
+def xcit_tiny_12_p16_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> XCiT:
+    """Get xcit_tiny_12_p16_224 model.
+    Refer to the base class 'models.XCiT' for more details.
+    """
+    default_cfg = default_cfgs['xcit_tiny_12_p16_224']
+    model = XCiT(
+        patch_size=16, num_classes=num_classes, embed_dim=192, depth=12, num_heads=4, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), eta=1.0, tokens_norm=True, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg,
+                        num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/reference/optim/index.html b/en/reference/optim/index.html new file mode 100644 index 000000000..0c00bdc89 --- /dev/null +++ b/en/reference/optim/index.html @@ -0,0 +1,2587 @@ + + + + + + + + + + + + + + + + + + + + + + + + optim - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Optimizer

+

Optimizer Factory

+ + + +
+ + + +

+mindcv.optim.optim_factory.create_optimizer(params, opt='adam', lr=0.001, weight_decay=0, momentum=0.9, nesterov=False, filter_bias_and_bn=True, loss_scale=1.0, schedule_decay=0.004, checkpoint_path='', eps=1e-10, **kwargs) + +

+ + +
+ +

Creates optimizer by name.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
params +
+

network parameters. Union[list[Parameter],list[dict]], which must be the list of parameters +or list of dicts. When the list element is a dictionary, the key of the dictionary can be +"params", "lr", "weight_decay","grad_centralization" and "order_params".

+
+

+

+
opt +
+

wrapped optimizer. You could choose like 'sgd', 'nesterov', 'momentum', 'adam', 'adamw', 'lion', +'rmsprop', 'adagrad', 'lamb'. 'adam' is the default choose for convolution-based networks. +'adamw' is recommended for ViT-based networks. Default: 'adam'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'adam' + +

+
lr +
+

learning rate: float or lr scheduler. Fixed and dynamic learning rate are supported. Default: 1e-3.

+
+

+ + TYPE: + Optional[float] + + + DEFAULT: + 0.001 + +

+
weight_decay +
+

weight decay factor. It should be noted that weight decay can be a constant value or a Cell. +It is a Cell only when dynamic weight decay is applied. Dynamic weight decay is similar to +dynamic learning rate, users need to customize a weight decay schedule only with global step as input, +and during training, the optimizer calls the instance of WeightDecaySchedule to get the weight decay value +of current step. Default: 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0 + +

+
momentum +
+

momentum if the optimizer supports. Default: 0.9.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.9 + +

+
nesterov +
+

Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
filter_bias_and_bn +
+

whether to filter batch norm parameters and bias from weight decay. +If True, weight decay will not apply on BN parameters and bias in Conv or Dense layers. Default: True.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
loss_scale +
+

A floating point value for the loss scale, which must be larger than 0.0. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

Optimizer object

+
+
+ +
+ Source code in mindcv/optim/optim_factory.py +
 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
def create_optimizer(
+    params,
+    opt: str = "adam",
+    lr: Optional[float] = 1e-3,
+    weight_decay: float = 0,
+    momentum: float = 0.9,
+    nesterov: bool = False,
+    filter_bias_and_bn: bool = True,
+    loss_scale: float = 1.0,
+    schedule_decay: float = 4e-3,
+    checkpoint_path: str = "",
+    eps: float = 1e-10,
+    **kwargs,
+):
+    r"""Creates optimizer by name.
+
+    Args:
+        params: network parameters. Union[list[Parameter],list[dict]], which must be the list of parameters
+            or list of dicts. When the list element is a dictionary, the key of the dictionary can be
+            "params", "lr", "weight_decay","grad_centralization" and "order_params".
+        opt: wrapped optimizer. You could choose like 'sgd', 'nesterov', 'momentum', 'adam', 'adamw', 'lion',
+            'rmsprop', 'adagrad', 'lamb'. 'adam' is the default choose for convolution-based networks.
+            'adamw' is recommended for ViT-based networks. Default: 'adam'.
+        lr: learning rate: float or lr scheduler. Fixed and dynamic learning rate are supported. Default: 1e-3.
+        weight_decay: weight decay factor. It should be noted that weight decay can be a constant value or a Cell.
+            It is a Cell only when dynamic weight decay is applied. Dynamic weight decay is similar to
+            dynamic learning rate, users need to customize a weight decay schedule only with global step as input,
+            and during training, the optimizer calls the instance of WeightDecaySchedule to get the weight decay value
+            of current step. Default: 0.
+        momentum: momentum if the optimizer supports. Default: 0.9.
+        nesterov: Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients. Default: False.
+        filter_bias_and_bn: whether to filter batch norm parameters and bias from weight decay.
+            If True, weight decay will not apply on BN parameters and bias in Conv or Dense layers. Default: True.
+        loss_scale: A floating point value for the loss scale, which must be larger than 0.0. Default: 1.0.
+
+    Returns:
+        Optimizer object
+    """
+
+    opt = opt.lower()
+
+    if weight_decay and filter_bias_and_bn:
+        params = init_group_params(params, weight_decay)
+
+    opt_args = dict(**kwargs)
+    # if lr is not None:
+    #    opt_args.setdefault('lr', lr)
+
+    # non-adaptive: SGD, momentum, and nesterov
+    if opt == "sgd":
+        # note: nn.Momentum may perform better if momentum > 0.
+        optimizer = nn.SGD(
+            params=params,
+            learning_rate=lr,
+            momentum=momentum,
+            weight_decay=weight_decay,
+            nesterov=nesterov,
+            loss_scale=loss_scale,
+            **opt_args,
+        )
+    elif opt in ["momentum", "nesterov"]:
+        optimizer = nn.Momentum(
+            params=params,
+            learning_rate=lr,
+            momentum=momentum,
+            weight_decay=weight_decay,
+            use_nesterov=nesterov,
+            loss_scale=loss_scale,
+        )
+    # adaptive
+    elif opt == "adam":
+        optimizer = nn.Adam(
+            params=params,
+            learning_rate=lr,
+            weight_decay=weight_decay,
+            loss_scale=loss_scale,
+            use_nesterov=nesterov,
+            **opt_args,
+        )
+    elif opt == "adamw":
+        optimizer = AdamW(
+            params=params,
+            learning_rate=lr,
+            weight_decay=weight_decay,
+            loss_scale=loss_scale,
+            **opt_args,
+        )
+    elif opt == "lion":
+        optimizer = Lion(
+            params=params,
+            learning_rate=lr,
+            weight_decay=weight_decay,
+            loss_scale=loss_scale,
+            **opt_args,
+        )
+    elif opt == "nadam":
+        optimizer = NAdam(
+            params=params,
+            learning_rate=lr,
+            weight_decay=weight_decay,
+            loss_scale=loss_scale,
+            schedule_decay=schedule_decay,
+            **opt_args,
+        )
+    elif opt == "adan":
+        optimizer = Adan(
+            params=params,
+            learning_rate=lr,
+            weight_decay=weight_decay,
+            loss_scale=loss_scale,
+            **opt_args,
+        )
+    elif opt == "rmsprop":
+        optimizer = nn.RMSProp(
+            params=params,
+            learning_rate=lr,
+            momentum=momentum,
+            weight_decay=weight_decay,
+            loss_scale=loss_scale,
+            epsilon=eps,
+            **opt_args,
+        )
+    elif opt == "adagrad":
+        optimizer = nn.Adagrad(
+            params=params,
+            learning_rate=lr,
+            weight_decay=weight_decay,
+            loss_scale=loss_scale,
+            **opt_args,
+        )
+    elif opt == "lamb":
+        assert loss_scale == 1.0, "Loss scaler is not supported by Lamb optimizer"
+        optimizer = nn.Lamb(
+            params=params,
+            learning_rate=lr,
+            weight_decay=weight_decay,
+            **opt_args,
+        )
+    else:
+        raise ValueError(f"Invalid optimizer: {opt}")
+
+    if os.path.exists(checkpoint_path):
+        param_dict = load_checkpoint(checkpoint_path)
+        load_param_into_net(optimizer, param_dict)
+
+    return optimizer
+
+
+
+ +

AdamW

+ + +
+ + + +

+ mindcv.optim.adamw.AdamW + + +

+ + +
+

+ Bases: Optimizer

+ + +

Implements the gradient clipping by norm for a AdamWeightDecay optimizer.

+ +
+ Source code in mindcv/optim/adamw.py +
126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
class AdamW(Optimizer):
+    """
+    Implements the gradient clipping by norm for a AdamWeightDecay optimizer.
+    """
+
+    @opt_init_args_register
+    def __init__(
+        self,
+        params,
+        learning_rate=1e-3,
+        beta1=0.9,
+        beta2=0.999,
+        eps=1e-8,
+        weight_decay=0.0,
+        loss_scale=1.0,
+        clip=False,
+    ):
+        super().__init__(learning_rate, params, weight_decay)
+        _check_param_value(beta1, beta2, eps, self.cls_name)
+        self.beta1 = Tensor(np.array([beta1]).astype(np.float32))
+        self.beta2 = Tensor(np.array([beta2]).astype(np.float32))
+        self.eps = Tensor(np.array([eps]).astype(np.float32))
+        self.moments1 = self.parameters.clone(prefix="adam_m", init="zeros")
+        self.moments2 = self.parameters.clone(prefix="adam_v", init="zeros")
+        self.hyper_map = ops.HyperMap()
+        self.beta1_power = Parameter(initializer(1, [1], ms.float32), name="beta1_power")
+        self.beta2_power = Parameter(initializer(1, [1], ms.float32), name="beta2_power")
+
+        self.reciprocal_scale = Tensor(1.0 / loss_scale, ms.float32)
+        self.clip = clip
+
+    def construct(self, gradients):
+        lr = self.get_lr()
+        gradients = scale_grad(gradients, self.reciprocal_scale)
+        if self.clip:
+            gradients = ops.clip_by_global_norm(gradients, 5.0, None)
+
+        beta1_power = self.beta1_power * self.beta1
+        self.beta1_power = beta1_power
+        beta2_power = self.beta2_power * self.beta2
+        self.beta2_power = beta2_power
+
+        if self.is_group:
+            if self.is_group_lr:
+                optim_result = self.hyper_map(
+                    ops.partial(_adam_opt, beta1_power, beta2_power, self.beta1, self.beta2, self.eps),
+                    lr,
+                    self.weight_decay,
+                    self.parameters,
+                    self.moments1,
+                    self.moments2,
+                    gradients,
+                    self.decay_flags,
+                    self.optim_filter,
+                )
+            else:
+                optim_result = self.hyper_map(
+                    ops.partial(_adam_opt, beta1_power, beta2_power, self.beta1, self.beta2, self.eps, lr),
+                    self.weight_decay,
+                    self.parameters,
+                    self.moments1,
+                    self.moments2,
+                    gradients,
+                    self.decay_flags,
+                    self.optim_filter,
+                )
+        else:
+            optim_result = self.hyper_map(
+                ops.partial(
+                    _adam_opt, beta1_power, beta2_power, self.beta1, self.beta2, self.eps, lr, self.weight_decay
+                ),
+                self.parameters,
+                self.moments1,
+                self.moments2,
+                gradients,
+                self.decay_flags,
+                self.optim_filter,
+            )
+        if self.use_parallel:
+            self.broadcast_params(optim_result)
+        return optim_result
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Adan

+ + +
+ + + +

+ mindcv.optim.adan.Adan + + +

+ + +
+

+ Bases: Optimizer

+ + +

The Adan (ADAptive Nesterov momentum algorithm) Optimizer from https://arxiv.org/abs/2208.06677

+

Note: it is an experimental version.

+ +
+ Source code in mindcv/optim/adan.py +
107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
class Adan(Optimizer):
+    """
+    The Adan (ADAptive Nesterov momentum algorithm) Optimizer from https://arxiv.org/abs/2208.06677
+
+    Note: it is an experimental version.
+    """
+
+    @opt_init_args_register
+    def __init__(
+        self,
+        params,
+        learning_rate=1e-3,
+        beta1=0.98,
+        beta2=0.92,
+        beta3=0.99,
+        eps=1e-8,
+        use_locking=False,
+        weight_decay=0.0,
+        loss_scale=1.0,
+    ):
+        super().__init__(
+            learning_rate, params, weight_decay=weight_decay, loss_scale=loss_scale
+        )  # Optimized inherit weight decay is bloaked. weight decay is computed in this py.
+
+        _check_param_value(beta1, beta2, eps, self.cls_name)
+        assert isinstance(use_locking, bool), f"For {self.cls_name}, use_looking should be bool"
+
+        self.beta1 = Tensor(beta1, mstype.float32)
+        self.beta2 = Tensor(beta2, mstype.float32)
+        self.beta3 = Tensor(beta3, mstype.float32)
+
+        self.eps = Tensor(eps, mstype.float32)
+        self.use_locking = use_locking
+        self.moment1 = self._parameters.clone(prefix="moment1", init="zeros")  # m
+        self.moment2 = self._parameters.clone(prefix="moment2", init="zeros")  # v
+        self.moment3 = self._parameters.clone(prefix="moment3", init="zeros")  # n
+        self.prev_gradient = self._parameters.clone(prefix="prev_gradient", init="zeros")
+
+        self.weight_decay = Tensor(weight_decay, mstype.float32)
+
+    def construct(self, gradients):
+        params = self._parameters
+        moment1 = self.moment1
+        moment2 = self.moment2
+        moment3 = self.moment3
+
+        gradients = self.flatten_gradients(gradients)
+        gradients = self.gradients_centralization(gradients)
+        gradients = self.scale_grad(gradients)
+        gradients = self._grad_sparse_indices_deduplicate(gradients)
+        lr = self.get_lr()
+
+        # TODO: currently not support dist
+        success = self.map_(
+            ops.partial(_adan_opt, self.beta1, self.beta2, self.beta3, self.eps, lr, self.weight_decay),
+            params,
+            moment1,
+            moment2,
+            moment3,
+            gradients,
+            self.prev_gradient,
+        )
+
+        return success
+
+    @Optimizer.target.setter
+    def target(self, value):
+        """
+        If the input value is set to "CPU", the parameters will be updated on the host using the Fused
+        optimizer operation.
+        """
+        self._set_base_target(value)
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.optim.adan.Adan.target(value) + +

+ + +
+ +

If the input value is set to "CPU", the parameters will be updated on the host using the Fused +optimizer operation.

+ +
+ Source code in mindcv/optim/adan.py +
172
+173
+174
+175
+176
+177
+178
@Optimizer.target.setter
+def target(self, value):
+    """
+    If the input value is set to "CPU", the parameters will be updated on the host using the Fused
+    optimizer operation.
+    """
+    self._set_base_target(value)
+
+
+
+ +
+ + + +
+ +
+ +

Lion

+ + +
+ + + +

+ mindcv.optim.lion.Lion + + +

+ + +
+

+ Bases: Optimizer

+ + +

Implementation of Lion optimizer from paper 'https://arxiv.org/abs/2302.06675'. +Additionally, this implementation is with gradient clipping.

+

Notes: +lr is usually 3-10x smaller than adamw. +weight decay is usually 3-10x larger than adamw.

+ +
+ Source code in mindcv/optim/lion.py +
112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
class Lion(Optimizer):
+    """
+    Implementation of Lion optimizer from paper 'https://arxiv.org/abs/2302.06675'.
+    Additionally, this implementation is with gradient clipping.
+
+    Notes:
+    lr is usually 3-10x smaller than adamw.
+    weight decay is usually 3-10x larger than adamw.
+    """
+
+    @opt_init_args_register
+    def __init__(
+        self,
+        params,
+        learning_rate=2e-4,
+        beta1=0.9,
+        beta2=0.99,
+        weight_decay=0.0,
+        loss_scale=1.0,
+        clip=False,
+    ):
+        super().__init__(learning_rate, params, weight_decay)
+        _check_param_value(beta1, beta2, self.cls_name)
+        self.beta1 = Tensor(np.array([beta1]).astype(np.float32))
+        self.beta2 = Tensor(np.array([beta2]).astype(np.float32))
+        self.moments1 = self.parameters.clone(prefix="lion_m", init="zeros")
+        self.hyper_map = ops.HyperMap()
+        self.beta1_power = Parameter(initializer(1, [1], ms.float32), name="beta1_power")
+        self.beta2_power = Parameter(initializer(1, [1], ms.float32), name="beta2_power")
+
+        self.reciprocal_scale = Tensor(1.0 / loss_scale, ms.float32)
+        self.clip = clip
+
+    def construct(self, gradients):
+        lr = self.get_lr()
+        gradients = scale_grad(gradients, self.reciprocal_scale)
+        if self.clip:
+            gradients = ops.clip_by_global_norm(gradients, 5.0, None)
+
+        beta1_power = self.beta1_power * self.beta1
+        self.beta1_power = beta1_power
+        beta2_power = self.beta2_power * self.beta2
+        self.beta2_power = beta2_power
+
+        if self.is_group:
+            if self.is_group_lr:
+                optim_result = self.hyper_map(
+                    ops.partial(_lion_opt, beta1_power, beta2_power, self.beta1, self.beta2),
+                    lr,
+                    self.weight_decay,
+                    self.parameters,
+                    self.moments1,
+                    gradients,
+                    self.decay_flags,
+                    self.optim_filter,
+                )
+            else:
+                optim_result = self.hyper_map(
+                    ops.partial(_lion_opt, beta1_power, beta2_power, self.beta1, self.beta2, lr),
+                    self.weight_decay,
+                    self.parameters,
+                    self.moments1,
+                    gradients,
+                    self.decay_flags,
+                    self.optim_filter,
+                )
+        else:
+            optim_result = self.hyper_map(
+                ops.partial(_lion_opt, beta1_power, beta2_power, self.beta1, self.beta2, lr, self.weight_decay),
+                self.parameters,
+                self.moments1,
+                gradients,
+                self.decay_flags,
+                self.optim_filter,
+            )
+        if self.use_parallel:
+            self.broadcast_params(optim_result)
+        return optim_result
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

NAdam

+ + +
+ + + +

+ mindcv.optim.nadam.NAdam + + +

+ + +
+

+ Bases: Optimizer

+ + +

Implements NAdam algorithm (a variant of Adam based on Nesterov momentum).

+ +
+ Source code in mindcv/optim/nadam.py +
23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
class NAdam(Optimizer):
+    """
+    Implements NAdam algorithm (a variant of Adam based on Nesterov momentum).
+    """
+
+    @opt_init_args_register
+    def __init__(
+        self,
+        params,
+        learning_rate=2e-3,
+        beta1=0.9,
+        beta2=0.999,
+        eps=1e-8,
+        weight_decay=0.0,
+        loss_scale=1.0,
+        schedule_decay=4e-3,
+    ):
+        super().__init__(learning_rate, params, weight_decay, loss_scale)
+        _check_param_value(beta1, beta2, eps, self.cls_name)
+        self.beta1 = Tensor(np.array([beta1]).astype(np.float32))
+        self.beta2 = Tensor(np.array([beta2]).astype(np.float32))
+        self.eps = Tensor(np.array([eps]).astype(np.float32))
+        self.moments1 = self.parameters.clone(prefix="nadam_m", init="zeros")
+        self.moments2 = self.parameters.clone(prefix="nadam_v", init="zeros")
+        self.schedule_decay = Tensor(np.array([schedule_decay]).astype(np.float32))
+        self.mu_schedule = Parameter(initializer(1, [1], ms.float32), name="mu_schedule")
+        self.beta2_power = Parameter(initializer(1, [1], ms.float32), name="beta2_power")
+
+    def construct(self, gradients):
+        lr = self.get_lr()
+        params = self.parameters
+        step = self.global_step + _scaler_one
+        gradients = self.decay_weight(gradients)
+        mu = self.beta1 * (
+            _scaler_one - Tensor(0.5, ms.float32) * ops.pow(Tensor(0.96, ms.float32), step * self.schedule_decay)
+        )
+        mu_next = self.beta1 * (
+            _scaler_one
+            - Tensor(0.5, ms.float32) * ops.pow(Tensor(0.96, ms.float32), (step + _scaler_one) * self.schedule_decay)
+        )
+        mu_schedule = self.mu_schedule * mu
+        mu_schedule_next = self.mu_schedule * mu * mu_next
+        self.mu_schedule = mu_schedule
+        beta2_power = self.beta2_power * self.beta2
+        self.beta2_power = beta2_power
+
+        num_params = len(params)
+        for i in range(num_params):
+            ops.assign(self.moments1[i], self.beta1 * self.moments1[i] + (_scaler_one - self.beta1) * gradients[i])
+            ops.assign(
+                self.moments2[i], self.beta2 * self.moments2[i] + (_scaler_one - self.beta2) * ops.square(gradients[i])
+            )
+
+            regulate_m = mu_next * self.moments1[i] / (_scaler_one - mu_schedule_next) + (_scaler_one - mu) * gradients[
+                i
+            ] / (_scaler_one - mu_schedule)
+            regulate_v = self.moments2[i] / (_scaler_one - beta2_power)
+
+            update = params[i] - lr * regulate_m / (self.eps + ops.sqrt(regulate_v))
+            ops.assign(params[i], update)
+
+        return params
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/reference/scheduler/index.html b/en/reference/scheduler/index.html new file mode 100644 index 000000000..3941bd3b3 --- /dev/null +++ b/en/reference/scheduler/index.html @@ -0,0 +1,2599 @@ + + + + + + + + + + + + + + + + + + + + + + + + scheduler - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Learning Rate Scheduler

+

Scheduler Factory

+ + + +
+ + + +

+mindcv.scheduler.scheduler_factory.create_scheduler(steps_per_epoch, scheduler='constant', lr=0.01, min_lr=1e-06, warmup_epochs=3, warmup_factor=0.0, decay_epochs=10, decay_rate=0.9, milestones=None, num_epochs=200, num_cycles=1, cycle_decay=1.0, lr_epoch_stair=False) + +

+ + +
+ +

Creates learning rate scheduler by name.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
steps_per_epoch +
+

number of steps per epoch.

+
+

+ + TYPE: + int + +

+
scheduler +
+

scheduler name like 'constant', 'cosine_decay', 'step_decay', +'exponential_decay', 'polynomial_decay', 'multi_step_decay'. Default: 'constant'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'constant' + +

+
lr +
+

learning rate value. Default: 0.01.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.01 + +

+
min_lr +
+

lower lr bound for 'cosine_decay' schedulers. Default: 1e-6.

+
+

+ + TYPE: + float + + + DEFAULT: + 1e-06 + +

+
warmup_epochs +
+

epochs to warmup LR, if scheduler supports. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
warmup_factor +
+

the warmup phase of scheduler is a linearly increasing lr, +the beginning factor is warmup_factor, i.e., the lr of the first step/epoch is lr*warmup_factor, +and the ending lr in the warmup phase is lr. Default: 0.0

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
decay_epochs +
+

for 'cosine_decay' schedulers, decay LR to min_lr in decay_epochs. +For 'step_decay' scheduler, decay LR by a factor of decay_rate every decay_epochs. Default: 10.

+
+

+ + TYPE: + int + + + DEFAULT: + 10 + +

+
decay_rate +
+

LR decay rate. Default: 0.9.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.9 + +

+
milestones +
+

list of epoch milestones for 'multi_step_decay' scheduler. Must be increasing. Default: None

+
+

+ + TYPE: + list + + + DEFAULT: + None + +

+
num_epochs +
+

Number of total epochs. Default: 200.

+
+

+ + TYPE: + int + + + DEFAULT: + 200 + +

+
num_cycles +
+

Number of cycles for cosine decay and cyclic. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
cycle_decay +
+

Decay rate of lr max in each cosine cycle. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
lr_epoch_stair +
+

If True, LR will be updated in the beginning of each new epoch +and the LR will be consistent for each batch in one epoch. +Otherwise, learning rate will be updated dynamically in each step. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

Cell object for computing LR with input of current global steps

+
+
+ +
+ Source code in mindcv/scheduler/scheduler_factory.py +
 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
def create_scheduler(
+    steps_per_epoch: int,
+    scheduler: str = "constant",
+    lr: float = 0.01,
+    min_lr: float = 1e-6,
+    warmup_epochs: int = 3,
+    warmup_factor: float = 0.0,
+    decay_epochs: int = 10,
+    decay_rate: float = 0.9,
+    milestones: list = None,
+    num_epochs: int = 200,
+    num_cycles: int = 1,
+    cycle_decay: float = 1.0,
+    lr_epoch_stair: bool = False,
+):
+    r"""Creates learning rate scheduler by name.
+
+    Args:
+        steps_per_epoch: number of steps per epoch.
+        scheduler: scheduler name like 'constant', 'cosine_decay', 'step_decay',
+            'exponential_decay', 'polynomial_decay', 'multi_step_decay'. Default: 'constant'.
+        lr: learning rate value. Default: 0.01.
+        min_lr: lower lr bound for 'cosine_decay' schedulers. Default: 1e-6.
+        warmup_epochs: epochs to warmup LR, if scheduler supports. Default: 3.
+        warmup_factor: the warmup phase of scheduler is a linearly increasing lr,
+            the beginning factor is `warmup_factor`, i.e., the lr of the first step/epoch is lr*warmup_factor,
+            and the ending lr in the warmup phase is lr. Default: 0.0
+        decay_epochs: for 'cosine_decay' schedulers, decay LR to min_lr in `decay_epochs`.
+            For 'step_decay' scheduler, decay LR by a factor of `decay_rate` every `decay_epochs`. Default: 10.
+        decay_rate: LR decay rate. Default: 0.9.
+        milestones: list of epoch milestones for 'multi_step_decay' scheduler. Must be increasing. Default: None
+        num_epochs: Number of total epochs. Default: 200.
+        num_cycles: Number of cycles for cosine decay and cyclic. Default: 1.
+        cycle_decay: Decay rate of lr max in each cosine cycle. Default: 1.0.
+        lr_epoch_stair: If True, LR will be updated in the beginning of each new epoch
+            and the LR will be consistent for each batch in one epoch.
+            Otherwise, learning rate will be updated dynamically in each step. Default: False.
+    Returns:
+        Cell object for computing LR with input of current global steps
+    """
+    # check params
+    if milestones is None:
+        milestones = []
+
+    if warmup_epochs + decay_epochs > num_epochs:
+        _logger.warning("warmup_epochs + decay_epochs > num_epochs. Please check and reduce decay_epochs!")
+
+    # lr warmup phase
+    warmup_lr_scheduler = []
+    if warmup_epochs > 0:
+        if warmup_factor == 0 and lr_epoch_stair:
+            _logger.warning(
+                "The warmup factor is set to 0, lr of 0-th epoch is always zero! " "Recommend value is 0.01."
+            )
+        warmup_func = linear_lr if lr_epoch_stair else linear_refined_lr
+        warmup_lr_scheduler = warmup_func(
+            start_factor=warmup_factor,
+            end_factor=1.0,
+            total_iters=warmup_epochs,
+            lr=lr,
+            steps_per_epoch=steps_per_epoch,
+            epochs=warmup_epochs,
+        )
+
+    # lr decay phase
+    main_epochs = num_epochs - warmup_epochs
+    if scheduler in ["cosine_decay", "warmup_cosine_decay"]:
+        cosine_func = cosine_decay_lr if lr_epoch_stair else cosine_decay_refined_lr
+        main_lr_scheduler = cosine_func(
+            decay_epochs=decay_epochs,
+            eta_min=min_lr,
+            eta_max=lr,
+            steps_per_epoch=steps_per_epoch,
+            epochs=main_epochs,
+            num_cycles=num_cycles,
+            cycle_decay=cycle_decay,
+        )
+    elif scheduler == "one_cycle":
+        if lr_epoch_stair or warmup_epochs > 0:
+            raise ValueError(
+                "OneCycle scheduler doesn't support learning rate varies with epoch and warmup_epochs > 0."
+            )
+        div_factor = 25.0
+        initial_lr = lr / div_factor
+        final_div_factor = initial_lr / min_lr
+        main_lr_scheduler = one_cycle_lr(
+            max_lr=lr,
+            final_div_factor=final_div_factor,
+            steps_per_epoch=steps_per_epoch,
+            epochs=main_epochs,
+        )
+    elif scheduler == "cyclic":
+        if lr_epoch_stair or warmup_epochs > 0:
+            raise ValueError("Cyclic scheduler doesn't support learning rate varies with epoch and warmup_epochs > 0.")
+        num_steps = steps_per_epoch * main_epochs
+        step_size_up = int(num_steps / num_cycles / 2)
+        main_lr_scheduler = cyclic_lr(
+            base_lr=min_lr,
+            max_lr=lr,
+            step_size_up=step_size_up,
+            steps_per_epoch=steps_per_epoch,
+            epochs=main_epochs,
+        )
+    elif scheduler == "exponential_decay":
+        exponential_func = exponential_lr if lr_epoch_stair else exponential_refined_lr
+        main_lr_scheduler = exponential_func(
+            gamma=decay_rate, lr=lr, steps_per_epoch=steps_per_epoch, epochs=main_epochs
+        )
+    elif scheduler == "polynomial_decay":
+        polynomial_func = polynomial_lr if lr_epoch_stair else polynomial_refined_lr
+        main_lr_scheduler = polynomial_func(
+            total_iters=main_epochs, power=decay_rate, lr=lr, steps_per_epoch=steps_per_epoch, epochs=main_epochs
+        )
+    elif scheduler == "step_decay":
+        main_lr_scheduler = step_lr(
+            step_size=decay_epochs, gamma=decay_rate, lr=lr, steps_per_epoch=steps_per_epoch, epochs=main_epochs
+        )
+    elif scheduler == "multi_step_decay":
+        main_lr_scheduler = multi_step_lr(
+            milestones=milestones, gamma=decay_rate, lr=lr, steps_per_epoch=steps_per_epoch, epochs=main_epochs
+        )
+    elif scheduler == "constant":
+        main_lr_scheduler = [lr for _ in range(steps_per_epoch * main_epochs)]
+    else:
+        raise ValueError(f"Invalid scheduler: {scheduler}")
+
+    # combine
+    lr_scheduler = warmup_lr_scheduler + main_lr_scheduler
+
+    return lr_scheduler
+
+
+
+ +
+ +
+ + + +

+ mindcv.scheduler.dynamic_lr + + +

+ +
+ +

Meta learning rate scheduler.

+

This module implements exactly the same learning rate scheduler as native PyTorch, +see "torch.optim.lr_scheduler" <https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate>_. +At present, only constant_lr, linear_lr, polynomial_lr, exponential_lr, step_lr, multi_step_lr, +cosine_annealing_lr, cosine_annealing_warm_restarts_lr, one_cycle_lr, cyclic_lr are implemented. +The number, name and usage of the Positional Arguments are exactly the same as those of native PyTorch.

+

However, due to the constraint of having to explicitly return the learning rate at each step, we have to +introduce additional Keyword Arguments. There are only three Keyword Arguments introduced, +namely lr, steps_per_epoch and epochs, explained as follows: +lr: the basic learning rate when creating optim in torch. +steps_per_epoch: the number of steps(iterations) of each epoch. +epochs: the number of epoch. It and steps_per_epoch determine the length of the returned lrs.

+

In all schedulers, one_cycle_lr and cyclic_lr only need two Keyword Arguments except lr, since +when creating optim in torch, lr argument will have no effect if using the two schedulers above.

+

Since most scheduler in PyTorch are coarse-grained, that is the learning rate is constant within a single epoch. +For non-stepwise scheduler, we introduce several fine-grained variation, that is the learning rate +is also changed within a single epoch. The function name of these variants have the refined keyword. +The implemented fine-grained variation are list as follows: linear_refined_lr, polynomial_refined_lr, etc.

+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.scheduler.dynamic_lr.cosine_decay_lr(decay_epochs, eta_min, *, eta_max, steps_per_epoch, epochs, num_cycles=1, cycle_decay=1.0) + +

+ + +
+ +

update every epoch

+ +
+ Source code in mindcv/scheduler/dynamic_lr.py +
120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
def cosine_decay_lr(decay_epochs, eta_min, *, eta_max, steps_per_epoch, epochs, num_cycles=1, cycle_decay=1.0):
+    """update every epoch"""
+    tot_steps = steps_per_epoch * epochs
+    lrs = []
+
+    for c in range(num_cycles):
+        lr_max = eta_max * (cycle_decay**c)
+        delta = 0.5 * (lr_max - eta_min)
+        for i in range(steps_per_epoch * decay_epochs):
+            t_cur = math.floor(i / steps_per_epoch)
+            t_cur = min(t_cur, decay_epochs)
+            lr_cur = eta_min + delta * (1.0 + math.cos(math.pi * t_cur / decay_epochs))
+            if len(lrs) < tot_steps:
+                lrs.append(lr_cur)
+            else:
+                break
+
+    if epochs > num_cycles * decay_epochs:
+        for i in range((epochs - (num_cycles * decay_epochs)) * steps_per_epoch):
+            lrs.append(eta_min)
+
+    return lrs
+
+
+
+ +
+ + +
+ + + +

+mindcv.scheduler.dynamic_lr.cosine_decay_refined_lr(decay_epochs, eta_min, *, eta_max, steps_per_epoch, epochs, num_cycles=1, cycle_decay=1.0) + +

+ + +
+ +

update every step

+ +
+ Source code in mindcv/scheduler/dynamic_lr.py +
144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
def cosine_decay_refined_lr(decay_epochs, eta_min, *, eta_max, steps_per_epoch, epochs, num_cycles=1, cycle_decay=1.0):
+    """update every step"""
+    tot_steps = steps_per_epoch * epochs
+    lrs = []
+
+    for c in range(num_cycles):
+        lr_max = eta_max * (cycle_decay**c)
+        delta = 0.5 * (lr_max - eta_min)
+        for i in range(steps_per_epoch * decay_epochs):
+            t_cur = i / steps_per_epoch
+            t_cur = min(t_cur, decay_epochs)
+            lr_cur = eta_min + delta * (1.0 + math.cos(math.pi * t_cur / decay_epochs))
+            if len(lrs) < tot_steps:
+                lrs.append(lr_cur)
+            else:
+                break
+
+    if epochs > num_cycles * decay_epochs:
+        for i in range((epochs - (num_cycles * decay_epochs)) * steps_per_epoch):
+            lrs.append(eta_min)
+
+    return lrs
+
+
+
+ +
+ + +
+ + + +

+mindcv.scheduler.dynamic_lr.cyclic_lr(base_lr, max_lr, step_size_up=2000, step_size_down=None, mode='triangular', gamma=1.0, scale_fn=None, scale_mode='cycle', *, steps_per_epoch, epochs) + +

+ + +
+ +

Cyclic learning rate scheduler based on +'"Cyclical Learning Rates for Training Neural Networks" https://arxiv.org/abs/1708.07120'

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
base_lr +
+

Lower learning rate boundaries in each cycle.

+
+

+ + TYPE: + float + +

+
max_lr +
+

Upper learning rate boundaries in each cycle.

+
+

+ + TYPE: + float + +

+
step_size_up +
+

Number of steps in the increasing half in each cycle. Default: 2000.

+
+

+ + TYPE: + int + + + DEFAULT: + 2000 + +

+
step_size_down +
+

Number of steps in the increasing half in each cycle. If step_size_down +is None, it's set to step_size_up. Default: None.

+
+

+ + DEFAULT: + None + +

+
div_factor +
+

Initial learning rate via initial_lr = max_lr / div_factor. +Default: 25.0.

+
+

+

+
final_div_factor +
+

Minimum learning rate at the end via +min_lr = initial_lr / final_div_factor. Default: 10000.0.

+
+

+

+
mode +
+

One of {triangular, triangular2, exp_range}. If scale_fn is not None, it's set to +None. Default: 'triangular'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'triangular' + +

+
gamma +
+

Constant in 'exp_range' calculating fuction: gamma**(cycle_iterations). +Default: 1.0

+
+

+ + DEFAULT: + 1.0 + +

+
scale_fn +
+

Custom scaling policy defined by a single argument lambda function. If it's +not None, 'mode' is ignored. Default: None

+
+

+ + DEFAULT: + None + +

+
scale_mode +
+

One of {'cycle', 'iterations'}. Determine scale_fn is evaluated on cycle +number or cycle iterations. Default: 'cycle'

+
+

+ + DEFAULT: + 'cycle' + +

+
steps_per_epoch +
+

Number of steps per epoch.

+
+

+ + TYPE: + int + +

+
epochs +
+

Number of total epochs.

+
+

+ + TYPE: + int + +

+
+ +
+ Source code in mindcv/scheduler/dynamic_lr.py +
266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
def cyclic_lr(
+    base_lr: float,
+    max_lr: float,
+    step_size_up: int = 2000,
+    step_size_down=None,
+    mode: str = "triangular",
+    gamma=1.0,
+    scale_fn=None,
+    scale_mode="cycle",
+    *,
+    steps_per_epoch: int,
+    epochs: int,
+):
+    """
+    Cyclic learning rate scheduler based on
+    '"Cyclical Learning Rates for Training Neural Networks" <https://arxiv.org/abs/1708.07120>'
+
+    Args:
+        base_lr: Lower learning rate boundaries in each cycle.
+        max_lr: Upper learning rate boundaries in each cycle.
+        step_size_up: Number of steps in the increasing half in each cycle. Default: 2000.
+        step_size_down: Number of steps in the increasing half in each cycle. If step_size_down
+            is None, it's set to step_size_up. Default: None.
+        div_factor: Initial learning rate via initial_lr = max_lr / div_factor.
+            Default: 25.0.
+        final_div_factor: Minimum learning rate at the end via
+            min_lr = initial_lr / final_div_factor. Default: 10000.0.
+        mode: One of {triangular, triangular2, exp_range}. If scale_fn is not None, it's set to
+            None. Default: 'triangular'.
+        gamma: Constant in 'exp_range' calculating fuction: gamma**(cycle_iterations).
+            Default: 1.0
+        scale_fn: Custom scaling policy defined by a single argument lambda function. If it's
+            not None, 'mode' is ignored. Default: None
+        scale_mode: One of {'cycle', 'iterations'}. Determine scale_fn is evaluated on cycle
+            number or cycle iterations. Default: 'cycle'
+        steps_per_epoch: Number of steps per epoch.
+        epochs: Number of total epochs.
+    """
+
+    def _triangular_scale_fn(x):
+        return 1.0
+
+    def _triangular2_scale_fn(x):
+        return 1 / (2.0**(x - 1))
+
+    def _exp_range_scale_fn(x):
+        return gamma**x
+
+    steps = steps_per_epoch * epochs
+    step_size_up = float(step_size_up)
+    step_size_down = float(step_size_down) if step_size_down is not None else step_size_up
+    total_size = step_size_up + step_size_down
+    step_ratio = step_size_up / total_size
+    if scale_fn is None:
+        if mode == "triangular":
+            scale_fn = _triangular_scale_fn
+            scale_mode = "cycle"
+        elif mode == "triangular2":
+            scale_fn = _triangular2_scale_fn
+            scale_mode = "cycle"
+        elif mode == "exp_range":
+            scale_fn = _exp_range_scale_fn
+            scale_mode = "iterations"
+    lrs = []
+    for i in range(steps):
+        cycle = math.floor(1 + i / total_size)
+        x = 1.0 + i / total_size - cycle
+        if x <= step_ratio:
+            scale_factor = x / step_ratio
+        else:
+            scale_factor = (x - 1) / (step_ratio - 1)
+        base_height = (max_lr - base_lr) * scale_factor
+        if scale_mode == "cycle":
+            lrs.append(base_lr + base_height * scale_fn(cycle))
+        else:
+            lrs.append(base_lr + base_height * scale_fn(i))
+    return lrs
+
+
+
+ +
+ + +
+ + + +

+mindcv.scheduler.dynamic_lr.one_cycle_lr(max_lr, pct_start=0.3, anneal_strategy='cos', div_factor=25.0, final_div_factor=10000.0, three_phase=False, *, steps_per_epoch, epochs) + +

+ + +
+ +

OneCycle learning rate scheduler based on +'"Super-Convergence: Very Fast Training of Neural Networks Using Large Learning Rates" +https://arxiv.org/abs/1708.07120'

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
max_lr +
+

Upper learning rate boundaries in the cycle.

+
+

+ + TYPE: + float + +

+
pct_start +
+

The percentage of the number of steps of increasing learning rate +in the cycle. Default: 0.3.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.3 + +

+
anneal_strategy +
+

Define the annealing strategy: "cos" for cosine annealing, +"linear" for linear annealing. Default: "cos".

+
+

+ + TYPE: + str + + + DEFAULT: + 'cos' + +

+
div_factor +
+

Initial learning rate via initial_lr = max_lr / div_factor. +Default: 25.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 25.0 + +

+
final_div_factor +
+

Minimum learning rate at the end via +min_lr = initial_lr / final_div_factor. Default: 10000.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 10000.0 + +

+
three_phase +
+

If True, learning rate will be updated by three-phase according to +"final_div_factor". Otherwise, learning rate will be updated by two-phase. +Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
steps_per_epoch +
+

Number of steps per epoch.

+
+

+ + TYPE: + int + +

+
epochs +
+

Number of total epochs.

+
+

+ + TYPE: + int + +

+
+ +
+ Source code in mindcv/scheduler/dynamic_lr.py +
197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
def one_cycle_lr(
+    max_lr: float,
+    pct_start: float = 0.3,
+    anneal_strategy: str = "cos",
+    div_factor: float = 25.0,
+    final_div_factor: float = 10000.0,
+    three_phase: bool = False,
+    *,
+    steps_per_epoch: int,
+    epochs: int,
+):
+    """
+    OneCycle learning rate scheduler based on
+    '"Super-Convergence: Very Fast Training of Neural Networks Using Large Learning Rates"
+    <https://arxiv.org/abs/1708.07120>'
+
+    Args:
+        max_lr: Upper learning rate boundaries in the cycle.
+        pct_start: The percentage of the number of steps of increasing learning rate
+            in the cycle. Default: 0.3.
+        anneal_strategy: Define the annealing strategy: "cos" for cosine annealing,
+            "linear" for linear annealing. Default: "cos".
+        div_factor: Initial learning rate via initial_lr = max_lr / div_factor.
+            Default: 25.0.
+        final_div_factor: Minimum learning rate at the end via
+            min_lr = initial_lr / final_div_factor. Default: 10000.0.
+        three_phase: If True, learning rate will be updated by three-phase according to
+            "final_div_factor". Otherwise, learning rate will be updated by two-phase.
+            Default: False.
+        steps_per_epoch: Number of steps per epoch.
+        epochs: Number of total epochs.
+    """
+
+    def _annealing_cos(start, end, pct):
+        cos_out = math.cos(math.pi * pct) + 1
+        return end + (start - end) / 2.0 * cos_out
+
+    def _annealing_linear(start, end, pct):
+        return (end - start) * pct + start
+
+    initial_lr = max_lr / div_factor
+    min_lr = initial_lr / final_div_factor
+    steps = steps_per_epoch * epochs
+    step_size_up = float(pct_start * steps) - 1
+    step_size_down = float(2 * pct_start * steps) - 2
+    step_size_end = float(steps) - 1
+    if anneal_strategy == "cos":
+        anneal_func = _annealing_cos
+    elif anneal_strategy == "linear":
+        anneal_func = _annealing_linear
+    else:
+        raise ValueError(f"anneal_strategy must be one of 'cos' or 'linear', but got {anneal_strategy}")
+    lrs = []
+    for i in range(steps):
+        if three_phase:
+            if i <= step_size_up:
+                lrs.append(anneal_func(initial_lr, max_lr, i / step_size_up))
+            elif step_size_up < i <= step_size_down:
+                lrs.append(anneal_func(max_lr, initial_lr, (i - step_size_up) / (step_size_down - step_size_up)))
+            else:
+                lrs.append(anneal_func(initial_lr, min_lr, (i - step_size_down) / (step_size_end - step_size_down)))
+        else:
+            if i <= step_size_up:
+                lrs.append(anneal_func(initial_lr, max_lr, i / step_size_up))
+            else:
+                lrs.append(anneal_func(max_lr, min_lr, (i - step_size_up) / (step_size_end - step_size_up)))
+    return lrs
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/reference/utils/index.html b/en/reference/utils/index.html new file mode 100644 index 000000000..08bc2bf74 --- /dev/null +++ b/en/reference/utils/index.html @@ -0,0 +1,3282 @@ + + + + + + + + + + + + + + + + + + + + + + + + utils - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Utility

+

Logger

+ + + +
+ + + +

+mindcv.utils.logger.set_logger(name=None, output_dir=None, rank=0, log_level=logging.INFO, color=True) + +

+ + +
+ +

Initialize the logger.

+

If the logger has not been initialized, this method will initialize the +logger by adding one or two handlers, otherwise the initialized logger will +be directly returned. During initialization, only logger of the master +process is added console handler. If output_dir is specified, all loggers +will be added file handler.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
name +
+

Logger name. Defaults to None to set up root logger.

+
+

+ + TYPE: + Optional[str] + + + DEFAULT: + None + +

+
output_dir +
+

The directory to save log.

+
+

+ + TYPE: + Optional[str] + + + DEFAULT: + None + +

+
rank +
+

Process rank in the distributed training. Defaults to 0.

+
+

+ + TYPE: + int + + + DEFAULT: + 0 + +

+
log_level +
+

Verbosity level of the logger. Defaults to logging.INFO.

+
+

+ + TYPE: + int + + + DEFAULT: + logging.INFO + +

+
color +
+

If True, color the output. Defaults to True.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + logging.Logger + + +
+

logging.Logger: A initialized logger.

+
+
+ +
+ Source code in mindcv/utils/logger.py +
 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
def set_logger(
+    name: Optional[str] = None,
+    output_dir: Optional[str] = None,
+    rank: int = 0,
+    log_level: int = logging.INFO,
+    color: bool = True,
+) -> logging.Logger:
+    """Initialize the logger.
+
+    If the logger has not been initialized, this method will initialize the
+    logger by adding one or two handlers, otherwise the initialized logger will
+    be directly returned. During initialization, only logger of the master
+    process is added console handler. If ``output_dir`` is specified, all loggers
+    will be added file handler.
+
+    Args:
+        name: Logger name. Defaults to None to set up root logger.
+        output_dir: The directory to save log.
+        rank: Process rank in the distributed training. Defaults to 0.
+        log_level: Verbosity level of the logger. Defaults to ``logging.INFO``.
+        color: If True, color the output. Defaults to True.
+
+    Returns:
+        logging.Logger: A initialized logger.
+    """
+    if name in logger_initialized:
+        return logger_initialized[name]
+
+    # get root logger if name is None
+    logger = logging.getLogger(name)
+    logger.setLevel(log_level)
+    # the messages of this logger will not be propagated to its parent
+    logger.propagate = False
+
+    fmt = "%(asctime)s %(name)s %(levelname)s - %(message)s"
+    datefmt = "[%Y-%m-%d %H:%M:%S]"
+
+    # create console handler for master process
+    if rank == 0:
+        if color:
+            if has_rich:
+                console_handler = RichHandler(level=log_level, log_time_format=datefmt)
+            elif has_termcolor:
+                console_handler = logging.StreamHandler(stream=sys.stdout)
+                console_handler.setLevel(log_level)
+                console_handler.setFormatter(_ColorfulFormatter(fmt=fmt, datefmt=datefmt))
+            else:
+                raise NotImplementedError("If you want color, 'rich' or 'termcolor' has to be installed!")
+        else:
+            console_handler = logging.StreamHandler(stream=sys.stdout)
+            console_handler.setLevel(log_level)
+            console_handler.setFormatter(logging.Formatter(fmt=fmt, datefmt=datefmt))
+        logger.addHandler(console_handler)
+
+    if output_dir is not None:
+        os.makedirs(output_dir, exist_ok=True)
+        file_handler = logging.FileHandler(os.path.join(output_dir, f"rank{rank}.log"))
+        file_handler.setLevel(log_level)
+        file_handler.setFormatter(logging.Formatter(fmt=fmt, datefmt=datefmt))
+        logger.addHandler(file_handler)
+
+    logger_initialized[name] = logger
+    return logger
+
+
+
+ +

Callbacks

+ + +
+ + + +

+ mindcv.utils.callbacks.StateMonitor + + +

+ + +
+

+ Bases: Callback

+ + +

Train loss and validation accuracy monitor, after each epoch save the +best checkpoint file with the highest validation accuracy.

+ +
+ Source code in mindcv/utils/callbacks.py +
 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
class StateMonitor(Callback):
+    """
+    Train loss and validation accuracy monitor, after each epoch save the
+    best checkpoint file with the highest validation accuracy.
+    """
+
+    def __init__(
+        self,
+        model,
+        model_name="",
+        model_ema=False,
+        last_epoch=0,
+        dataset_sink_mode=True,
+        dataset_val=None,
+        metric_name=("accuracy",),
+        val_interval=1,
+        val_start_epoch=1,
+        save_best_ckpt=True,
+        ckpt_save_dir="./",
+        ckpt_save_interval=1,
+        ckpt_save_policy=None,
+        ckpt_keep_max=10,
+        summary_dir="./",
+        log_interval=100,
+        rank_id=None,
+        device_num=None,
+    ):
+        super().__init__()
+        # model
+        self.model = model
+        self.model_name = model_name
+        self.model_ema = model_ema
+        self.last_epoch = last_epoch
+        self.dataset_sink_mode = dataset_sink_mode
+        # evaluation
+        self.dataset_val = dataset_val
+        self.metric_name = metric_name
+        self.val_interval = val_interval
+        self.val_start_epoch = val_start_epoch
+        # logging
+        self.best_res = 0
+        self.best_epoch = -1
+        self.save_best_ckpt = save_best_ckpt
+        self.ckpt_save_dir = ckpt_save_dir
+        self.ckpt_save_interval = ckpt_save_interval
+        self.ckpt_save_policy = ckpt_save_policy
+        self.ckpt_keep_max = ckpt_keep_max
+        self.ckpt_manager = CheckpointManager(ckpt_save_policy=self.ckpt_save_policy)
+        self._need_flush_from_cache = True
+        self.summary_dir = summary_dir
+        self.log_interval = log_interval
+        # system
+        self.rank_id = rank_id if rank_id is not None else 0
+        self.device_num = device_num if rank_id is not None else 1
+        if self.rank_id in [0, None]:
+            os.makedirs(ckpt_save_dir, exist_ok=True)
+            self.log_file = os.path.join(ckpt_save_dir, "result.log")
+            log_line = "".join(
+                f"{s:<20}" for s in ["Epoch", "TrainLoss", *metric_name, "TrainTime", "EvalTime", "TotalTime"]
+            )
+            with open(self.log_file, "w", encoding="utf-8") as fp:  # writing the title of result.log
+                fp.write(log_line + "\n")
+        if self.device_num > 1:
+            self.all_reduce = AllReduceSum()
+        # timestamp
+        self.step_ts = None
+        self.epoch_ts = None
+        self.step_time_accum = 0
+        # model_ema
+        if self.model_ema:
+            self.hyper_map = ops.HyperMap()
+            self.online_params = ParameterTuple(self.model.train_network.get_parameters())
+            self.swap_params = self.online_params.clone("swap", "zeros")
+
+    def __enter__(self):
+        self.summary_record = SummaryRecord(self.summary_dir)
+        return self
+
+    def __exit__(self, *exc_args):
+        self.summary_record.close()
+
+    def apply_eval(self, run_context):
+        """Model evaluation, return validation accuracy."""
+        if self.model_ema:
+            cb_params = run_context.original_args()
+            self.hyper_map(ops.assign, self.swap_params, self.online_params)
+            ema_dict = dict()
+            net = self._get_network_from_cbp(cb_params)
+            for param in net.get_parameters():
+                if param.name.startswith("ema"):
+                    new_name = param.name.split("ema.")[1]
+                    ema_dict[new_name] = param.data
+            load_param_into_net(self.model.train_network.network, ema_dict)
+            res_dict = self.model.eval(self.dataset_val, dataset_sink_mode=False)
+            self.hyper_map(ops.assign, self.online_params, self.swap_params)
+        else:
+            res_dict = self.model.eval(self.dataset_val, dataset_sink_mode=False)
+        res_array = ms.Tensor(list(res_dict.values()), ms.float32)
+        if self.device_num > 1:
+            res_array = self.all_reduce(res_array)
+            res_array /= self.device_num
+        res_array = res_array.asnumpy()
+        return res_array
+
+    def on_train_step_begin(self, run_context):
+        self.step_ts = time()
+
+    def on_train_epoch_begin(self, run_context):
+        self.epoch_ts = time()
+
+    def on_train_step_end(self, run_context):
+        cb_params = run_context.original_args()
+        num_epochs = cb_params.epoch_num
+        num_batches = cb_params.batch_num
+        # num_steps = num_batches * num_epochs
+        # cur_x start from 1, end at num_xs, range: [1, num_xs]
+        cur_step = cb_params.cur_step_num + self.last_epoch * num_batches
+        cur_epoch = cb_params.cur_epoch_num + self.last_epoch
+        cur_batch = (cur_step - 1) % num_batches + 1
+
+        self.step_time_accum += time() - self.step_ts
+        if cur_batch % self.log_interval == 0 or cur_batch == num_batches or cur_batch == 1:
+            lr = self._get_lr_from_cbp(cb_params)
+            loss = self._get_loss_from_cbp(cb_params)
+            _logger.info(
+                f"Epoch: [{cur_epoch}/{num_epochs}], "
+                f"batch: [{cur_batch}/{num_batches}], "
+                f"loss: {loss.asnumpy():.6f}, "
+                f"lr: {lr.asnumpy():.6f}, "
+                f"time: {self.step_time_accum:.6f}s"
+            )
+            self.step_time_accum = 0
+
+    def on_train_epoch_end(self, run_context):
+        """
+        After epoch, print train loss and val accuracy,
+        save the best ckpt file with the highest validation accuracy.
+        """
+        cb_params = run_context.original_args()
+        num_epochs = cb_params.epoch_num
+        num_batches = cb_params.batch_num
+        cur_step = cb_params.cur_step_num + self.last_epoch * num_batches
+        cur_epoch = cb_params.cur_epoch_num + self.last_epoch
+        cur_batch = (cur_step - 1) % num_batches + 1
+
+        train_time = time() - self.epoch_ts
+        loss = self._get_loss_from_cbp(cb_params)
+
+        val_time = 0
+        res = np.zeros(len(self.metric_name), dtype=np.float32)
+        # val while training if validation loader is not None
+        if (
+            self.dataset_val is not None
+            and cur_epoch >= self.val_start_epoch
+            and (cur_epoch - self.val_start_epoch) % self.val_interval == 0
+        ):
+            val_time = time()
+            res = self.apply_eval(run_context)
+            val_time = time() - val_time
+            # record val acc
+            metric_str = "Validation "
+            for i in range(len(self.metric_name)):
+                metric_str += f"{self.metric_name[i]}: {res[i]:.4%}, "
+            metric_str += f"time: {val_time:.6f}s"
+            _logger.info(metric_str)
+            # save the best ckpt file
+            if res[0] > self.best_res:
+                self.best_res = res[0]
+                self.best_epoch = cur_epoch
+                _logger.info(f"=> New best val acc: {res[0]:.4%}")
+
+        # save checkpoint
+        if self.rank_id in [0, None]:
+            if self.save_best_ckpt and self.best_epoch == cur_epoch:  # always save ckpt if cur epoch got best acc
+                best_ckpt_save_path = os.path.join(self.ckpt_save_dir, f"{self.model_name}_best.ckpt")
+                save_checkpoint(cb_params.train_network, best_ckpt_save_path, async_save=True)
+            if (cur_epoch % self.ckpt_save_interval == 0) or (cur_epoch == num_epochs):
+                if self._need_flush_from_cache:
+                    self._flush_from_cache(cb_params)
+                # save optim for resume
+                optimizer = self._get_optimizer_from_cbp(cb_params)
+                optim_save_path = os.path.join(self.ckpt_save_dir, f"optim_{self.model_name}.ckpt")
+                save_checkpoint(optimizer, optim_save_path, async_save=True)
+                # keep checkpoint files number equal max number.
+                ckpt_save_path = os.path.join(self.ckpt_save_dir, f"{self.model_name}-{cur_epoch}_{cur_batch}.ckpt")
+                _logger.info(f"Saving model to {ckpt_save_path}")
+                self.ckpt_manager.save_ckpoint(
+                    cb_params.train_network,
+                    num_ckpt=self.ckpt_keep_max,
+                    metric=res[0],
+                    save_path=ckpt_save_path,
+                )
+
+        # logging
+        total_time = time() - self.epoch_ts
+        _logger.info(
+            f"Total time since last epoch: {total_time:.6f}(train: {train_time:.6f}, val: {val_time:.6f})s, "
+            f"ETA: {(num_epochs - cur_epoch) * total_time:.6f}s"
+        )
+        _logger.info("-" * 80)
+        if self.rank_id in [0, None]:
+            log_line = "".join(
+                f"{s:<20}"
+                for s in [
+                    f"{cur_epoch}",
+                    f"{loss.asnumpy():.6f}",
+                    *[f"{i:.4%}" for i in res],
+                    f"{train_time:.2f}",
+                    f"{val_time:.2f}",
+                    f"{total_time:.2f}",
+                ]
+            )
+            with open(self.log_file, "a", encoding="utf-8") as fp:
+                fp.write(log_line + "\n")
+
+        # summary
+        self.summary_record.add_value("scalar", f"train_loss_{self.rank_id}", loss)
+        for i in range(len(res)):
+            self.summary_record.add_value(
+                "scalar", f"val_{self.metric_name[i]}_{self.rank_id}", Tensor(res[i], dtype=ms.float32)
+            )
+        self.summary_record.record(cur_step)
+
+    def on_train_end(self, run_context):
+        _logger.info("Finish training!")
+        if self.dataset_val is not None:
+            _logger.info(
+                f"The best validation {self.metric_name[0]} is: {self.best_res:.4%} at epoch {self.best_epoch}."
+            )
+        _logger.info("=" * 80)
+
+    def _get_network_from_cbp(self, cb_params):
+        if self.dataset_sink_mode:
+            network = cb_params.train_network.network
+        else:
+            network = cb_params.train_network
+        return network
+
+    def _get_optimizer_from_cbp(self, cb_params):
+        if cb_params.optimizer is not None:
+            optimizer = cb_params.optimizer
+        elif self.dataset_sink_mode:
+            optimizer = cb_params.train_network.network.optimizer
+        else:
+            optimizer = cb_params.train_network.optimizer
+        return optimizer
+
+    def _get_lr_from_cbp(self, cb_params):
+        optimizer = self._get_optimizer_from_cbp(cb_params)
+        if optimizer.global_step < 1:
+            _logger.warning(
+                "`global_step` of optimizer is less than 1. It seems to be a overflow at the first step. "
+                "If you keep seeing this message, it means that the optimizer never actually called."
+            )
+            optim_step = Tensor((0,), ms.int32)
+        else:  # if the optimizer is successfully called, the global_step will actually be the value of next step.
+            optim_step = optimizer.global_step - 1
+        if optimizer.dynamic_lr:
+            lr = optimizer.learning_rate(optim_step)[0]
+        else:
+            lr = optimizer.learning_rate
+        return lr
+
+    def _get_loss_from_cbp(self, cb_params):
+        """
+        Get loss from the network output.
+        Args:
+            cb_params (_InternalCallbackParam): Callback parameters.
+        Returns:
+            Union[Tensor, None], if parse loss success, will return a Tensor value(shape is [1]), else return None.
+        """
+        output = cb_params.net_outputs
+        if output is None:
+            _logger.warning("Can not find any output by this network, so SummaryCollector will not collect loss.")
+            return None
+
+        if isinstance(output, (int, float, Tensor)):
+            loss = output
+        elif isinstance(output, (list, tuple)) and output:
+            # If the output is a list, since the default network returns loss first,
+            # we assume that the first one is loss.
+            loss = output[0]
+        else:
+            _logger.warning(
+                "The output type could not be identified, expect type is one of "
+                "[int, float, Tensor, list, tuple], so no loss was recorded in SummaryCollector."
+            )
+            return None
+
+        if not isinstance(loss, Tensor):
+            loss = Tensor(loss)
+
+        loss = Tensor(np.mean(loss.asnumpy()))
+        return loss
+
+    def _flush_from_cache(self, cb_params):
+        """Flush cache data to host if tensor is cache enable."""
+        has_cache_params = False
+        params = cb_params.train_network.get_parameters()
+        for param in params:
+            if param.cache_enable:
+                has_cache_params = True
+                Tensor(param).flush_from_cache()
+        if not has_cache_params:
+            self._need_flush_from_cache = False
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.utils.callbacks.StateMonitor.apply_eval(run_context) + +

+ + +
+ +

Model evaluation, return validation accuracy.

+ +
+ Source code in mindcv/utils/callbacks.py +
104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
def apply_eval(self, run_context):
+    """Model evaluation, return validation accuracy."""
+    if self.model_ema:
+        cb_params = run_context.original_args()
+        self.hyper_map(ops.assign, self.swap_params, self.online_params)
+        ema_dict = dict()
+        net = self._get_network_from_cbp(cb_params)
+        for param in net.get_parameters():
+            if param.name.startswith("ema"):
+                new_name = param.name.split("ema.")[1]
+                ema_dict[new_name] = param.data
+        load_param_into_net(self.model.train_network.network, ema_dict)
+        res_dict = self.model.eval(self.dataset_val, dataset_sink_mode=False)
+        self.hyper_map(ops.assign, self.online_params, self.swap_params)
+    else:
+        res_dict = self.model.eval(self.dataset_val, dataset_sink_mode=False)
+    res_array = ms.Tensor(list(res_dict.values()), ms.float32)
+    if self.device_num > 1:
+        res_array = self.all_reduce(res_array)
+        res_array /= self.device_num
+    res_array = res_array.asnumpy()
+    return res_array
+
+
+
+ +
+ + +
+ + + +

+mindcv.utils.callbacks.StateMonitor.on_train_epoch_end(run_context) + +

+ + +
+ +

After epoch, print train loss and val accuracy, +save the best ckpt file with the highest validation accuracy.

+ +
+ Source code in mindcv/utils/callbacks.py +
156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
def on_train_epoch_end(self, run_context):
+    """
+    After epoch, print train loss and val accuracy,
+    save the best ckpt file with the highest validation accuracy.
+    """
+    cb_params = run_context.original_args()
+    num_epochs = cb_params.epoch_num
+    num_batches = cb_params.batch_num
+    cur_step = cb_params.cur_step_num + self.last_epoch * num_batches
+    cur_epoch = cb_params.cur_epoch_num + self.last_epoch
+    cur_batch = (cur_step - 1) % num_batches + 1
+
+    train_time = time() - self.epoch_ts
+    loss = self._get_loss_from_cbp(cb_params)
+
+    val_time = 0
+    res = np.zeros(len(self.metric_name), dtype=np.float32)
+    # val while training if validation loader is not None
+    if (
+        self.dataset_val is not None
+        and cur_epoch >= self.val_start_epoch
+        and (cur_epoch - self.val_start_epoch) % self.val_interval == 0
+    ):
+        val_time = time()
+        res = self.apply_eval(run_context)
+        val_time = time() - val_time
+        # record val acc
+        metric_str = "Validation "
+        for i in range(len(self.metric_name)):
+            metric_str += f"{self.metric_name[i]}: {res[i]:.4%}, "
+        metric_str += f"time: {val_time:.6f}s"
+        _logger.info(metric_str)
+        # save the best ckpt file
+        if res[0] > self.best_res:
+            self.best_res = res[0]
+            self.best_epoch = cur_epoch
+            _logger.info(f"=> New best val acc: {res[0]:.4%}")
+
+    # save checkpoint
+    if self.rank_id in [0, None]:
+        if self.save_best_ckpt and self.best_epoch == cur_epoch:  # always save ckpt if cur epoch got best acc
+            best_ckpt_save_path = os.path.join(self.ckpt_save_dir, f"{self.model_name}_best.ckpt")
+            save_checkpoint(cb_params.train_network, best_ckpt_save_path, async_save=True)
+        if (cur_epoch % self.ckpt_save_interval == 0) or (cur_epoch == num_epochs):
+            if self._need_flush_from_cache:
+                self._flush_from_cache(cb_params)
+            # save optim for resume
+            optimizer = self._get_optimizer_from_cbp(cb_params)
+            optim_save_path = os.path.join(self.ckpt_save_dir, f"optim_{self.model_name}.ckpt")
+            save_checkpoint(optimizer, optim_save_path, async_save=True)
+            # keep checkpoint files number equal max number.
+            ckpt_save_path = os.path.join(self.ckpt_save_dir, f"{self.model_name}-{cur_epoch}_{cur_batch}.ckpt")
+            _logger.info(f"Saving model to {ckpt_save_path}")
+            self.ckpt_manager.save_ckpoint(
+                cb_params.train_network,
+                num_ckpt=self.ckpt_keep_max,
+                metric=res[0],
+                save_path=ckpt_save_path,
+            )
+
+    # logging
+    total_time = time() - self.epoch_ts
+    _logger.info(
+        f"Total time since last epoch: {total_time:.6f}(train: {train_time:.6f}, val: {val_time:.6f})s, "
+        f"ETA: {(num_epochs - cur_epoch) * total_time:.6f}s"
+    )
+    _logger.info("-" * 80)
+    if self.rank_id in [0, None]:
+        log_line = "".join(
+            f"{s:<20}"
+            for s in [
+                f"{cur_epoch}",
+                f"{loss.asnumpy():.6f}",
+                *[f"{i:.4%}" for i in res],
+                f"{train_time:.2f}",
+                f"{val_time:.2f}",
+                f"{total_time:.2f}",
+            ]
+        )
+        with open(self.log_file, "a", encoding="utf-8") as fp:
+            fp.write(log_line + "\n")
+
+    # summary
+    self.summary_record.add_value("scalar", f"train_loss_{self.rank_id}", loss)
+    for i in range(len(res)):
+        self.summary_record.add_value(
+            "scalar", f"val_{self.metric_name[i]}_{self.rank_id}", Tensor(res[i], dtype=ms.float32)
+        )
+    self.summary_record.record(cur_step)
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + +

+ mindcv.utils.callbacks.ValCallback + + +

+ + +
+

+ Bases: Callback

+ + +
+ Source code in mindcv/utils/callbacks.py +
330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
class ValCallback(Callback):
+    def __init__(self, log_interval=100):
+        super().__init__()
+        self.log_interval = log_interval
+        self.ts = time()
+
+    def on_eval_step_end(self, run_context):
+        cb_params = run_context.original_args()
+        num_batches = cb_params.batch_num
+        cur_step = cb_params.cur_step_num
+
+        if cur_step % self.log_interval == 0 or cur_step == num_batches:
+            print(f"batch: {cur_step}/{num_batches}, time: {time() - self.ts:.6f}s")
+            self.ts = time()
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Train Step

+ + +
+ + + +

+ mindcv.utils.train_step.TrainStep + + +

+ + +
+

+ Bases: nn.TrainOneStepWithLossScaleCell

+ + +

Training step with loss scale.

+ +
+ The customized trainOneStepCell also supported following algorithms +
    +
  • Exponential Moving Average (EMA)
  • +
  • Gradient Clipping
  • +
  • Gradient Accumulation
  • +
+
+
+ Source code in mindcv/utils/train_step.py +
 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
class TrainStep(nn.TrainOneStepWithLossScaleCell):
+    """Training step with loss scale.
+
+    The customized trainOneStepCell also supported following algorithms:
+        * Exponential Moving Average (EMA)
+        * Gradient Clipping
+        * Gradient Accumulation
+    """
+
+    def __init__(
+        self,
+        network,
+        optimizer,
+        scale_sense=1.0,
+        ema=False,
+        ema_decay=0.9999,
+        clip_grad=False,
+        clip_value=15.0,
+        gradient_accumulation_steps=1,
+    ):
+        super(TrainStep, self).__init__(network, optimizer, scale_sense)
+        self.ema = ema
+        self.ema_decay = ema_decay
+        self.updates = Parameter(Tensor(0.0, ms.float32))
+        self.clip_grad = clip_grad
+        self.clip_value = clip_value
+        if self.ema:
+            self.weights_all = ms.ParameterTuple(list(network.get_parameters()))
+            self.ema_weight = self.weights_all.clone("ema", init="same")
+
+        self.accumulate_grad = gradient_accumulation_steps > 1
+        if self.accumulate_grad:
+            self.gradient_accumulation = GradientAccumulation(gradient_accumulation_steps, optimizer, self.grad_reducer)
+
+    def ema_update(self):
+        self.updates += 1
+        # ema factor is corrected by (1 - exp(-t/T)), where `t` means time and `T` means temperature.
+        ema_decay = self.ema_decay * (1 - F.exp(-self.updates / 2000))
+        # update trainable parameters
+        success = self.hyper_map(F.partial(_ema_op, ema_decay), self.ema_weight, self.weights_all)
+        return success
+
+    def construct(self, *inputs):
+        weights = self.weights
+        loss = self.network(*inputs)
+        scaling_sens = self.scale_sense
+
+        status, scaling_sens = self.start_overflow_check(loss, scaling_sens)
+
+        scaling_sens_filled = ops.ones_like(loss) * F.cast(scaling_sens, F.dtype(loss))
+        grads = self.grad(self.network, weights)(*inputs, scaling_sens_filled)
+        grads = self.hyper_map(F.partial(_grad_scale, scaling_sens), grads)
+
+        # todo: When to clip grad? Do we need to clip grad after grad reduction? What if grad accumulation is needed?
+        if self.clip_grad:
+            grads = ops.clip_by_global_norm(grads, clip_norm=self.clip_value)
+
+        if self.loss_scaling_manager:  # scale_sense = update_cell: Cell --> TrainOneStepWithLossScaleCell.construct
+            if self.accumulate_grad:
+                # todo: GradientAccumulation only call grad_reducer at the step where the accumulation is completed.
+                #  So checking the overflow status is after gradient reduction, is this correct?
+                # get the overflow buffer
+                cond = self.get_overflow_status(status, grads)
+                overflow = self.process_loss_scale(cond)
+                # if there is no overflow, do optimize
+                if not overflow:
+                    loss = self.gradient_accumulation(loss, grads)
+            else:
+                # apply grad reducer on grads
+                grads = self.grad_reducer(grads)
+                # get the overflow buffer
+                cond = self.get_overflow_status(status, grads)
+                overflow = self.process_loss_scale(cond)
+                # if there is no overflow, do optimize
+                if not overflow:
+                    loss = F.depend(loss, self.optimizer(grads))
+        else:  # scale_sense = loss_scale: Tensor --> TrainOneStepCell.construct
+            if self.accumulate_grad:
+                loss = self.gradient_accumulation(loss, grads)
+            else:
+                grads = self.grad_reducer(grads)
+                loss = F.depend(loss, self.optimizer(grads))
+
+        if self.ema:
+            loss = F.depend(loss, self.ema_update())
+
+        return loss
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Trainer Factory

+ + + +
+ + + +

+mindcv.utils.trainer_factory.create_trainer(network, loss, optimizer, metrics, amp_level, amp_cast_list, loss_scale_type, loss_scale=1.0, drop_overflow_update=False, ema=False, ema_decay=0.9999, clip_grad=False, clip_value=15.0, gradient_accumulation_steps=1) + +

+ + +
+ +

Create Trainer.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
network +
+

The backbone network to train, evaluate or predict.

+
+

+ + TYPE: + nn.Cell + +

+
loss +
+

The function of calculating loss.

+
+

+ + TYPE: + nn.Cell + +

+
optimizer +
+

The optimizer for training.

+
+

+ + TYPE: + nn.Cell + +

+
metrics +
+

The metrics for model evaluation.

+
+

+ + TYPE: + Union[dict, set] + +

+
amp_level +
+

The level of auto mixing precision training.

+
+

+ + TYPE: + str + +

+
amp_cast_list +
+

At the cell level, custom casting the cell to FP16.

+
+

+ + TYPE: + str + +

+
loss_scale_type +
+

The type of loss scale.

+
+

+ + TYPE: + str + +

+
loss_scale +
+

The value of loss scale.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
drop_overflow_update +
+

Whether to execute optimizer if there is an overflow.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
ema +
+

Whether to use exponential moving average of model weights.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
ema_decay +
+

Decay factor for model weights moving average.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.9999 + +

+
clip_grad +
+

whether to gradient clip.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
clip_value +
+

The value at which to clip gradients.

+
+

+ + TYPE: + float + + + DEFAULT: + 15.0 + +

+
gradient_accumulation_steps +
+

Accumulate the gradients of n batches before update.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

mindspore.Model

+
+
+ +
+ Source code in mindcv/utils/trainer_factory.py +
 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
def create_trainer(
+    network: nn.Cell,
+    loss: nn.Cell,
+    optimizer: nn.Cell,
+    metrics: Union[dict, set],
+    amp_level: str,
+    amp_cast_list: str,
+    loss_scale_type: str,
+    loss_scale: float = 1.0,
+    drop_overflow_update: bool = False,
+    ema: bool = False,
+    ema_decay: float = 0.9999,
+    clip_grad: bool = False,
+    clip_value: float = 15.0,
+    gradient_accumulation_steps: int = 1,
+):
+    """Create Trainer.
+
+    Args:
+        network: The backbone network to train, evaluate or predict.
+        loss: The function of calculating loss.
+        optimizer: The optimizer for training.
+        metrics: The metrics for model evaluation.
+        amp_level: The level of auto mixing precision training.
+        amp_cast_list: At the cell level, custom casting the cell to FP16.
+        loss_scale_type: The type of loss scale.
+        loss_scale: The value of loss scale.
+        drop_overflow_update: Whether to execute optimizer if there is an overflow.
+        ema: Whether to use exponential moving average of model weights.
+        ema_decay: Decay factor for model weights moving average.
+        clip_grad: whether to gradient clip.
+        clip_value: The value at which to clip gradients.
+        gradient_accumulation_steps: Accumulate the gradients of n batches before update.
+
+    Returns:
+        mindspore.Model
+
+    """
+    if loss_scale < 1.0:
+        raise ValueError("Loss scale cannot be less than 1.0!")
+
+    if drop_overflow_update is False and loss_scale_type.lower() == "dynamic":
+        raise ValueError("DynamicLossScale ALWAYS drop overflow!")
+
+    if gradient_accumulation_steps < 1:
+        raise ValueError("`gradient_accumulation_steps` must be >= 1!")
+
+    if not require_customized_train_step(ema, clip_grad, gradient_accumulation_steps, amp_cast_list):
+        mindspore_kwargs = dict(
+            network=network,
+            loss_fn=loss,
+            optimizer=optimizer,
+            metrics=metrics,
+            amp_level=amp_level,
+        )
+        if loss_scale_type.lower() == "fixed":
+            mindspore_kwargs["loss_scale_manager"] = FixedLossScaleManager(
+                loss_scale=loss_scale, drop_overflow_update=drop_overflow_update
+            )
+        elif loss_scale_type.lower() == "dynamic":
+            mindspore_kwargs["loss_scale_manager"] = DynamicLossScaleManager(
+                init_loss_scale=loss_scale, scale_factor=2, scale_window=2000
+            )
+        elif loss_scale_type.lower() == "auto":
+            # We don't explicitly construct LossScaleManager
+            _logger.warning(
+                "You are using AUTO loss scale, which means the LossScaleManager isn't explicitly pass in "
+                "when creating a mindspore.Model instance. "
+                "NOTE: mindspore.Model may use LossScaleManager silently. See mindspore.train.amp for details."
+            )
+        else:
+            raise ValueError(f"Loss scale type only support ['fixed', 'dynamic', 'auto'], but got{loss_scale_type}.")
+        model = Model(**mindspore_kwargs)
+    else:  # require customized train step
+        eval_network = nn.WithEvalCell(network, loss, amp_level in ["O2", "O3", "auto"])
+        auto_mixed_precision(network, amp_level, amp_cast_list)
+        net_with_loss = add_loss_network(network, loss, amp_level)
+        train_step_kwargs = dict(
+            network=net_with_loss,
+            optimizer=optimizer,
+            ema=ema,
+            ema_decay=ema_decay,
+            clip_grad=clip_grad,
+            clip_value=clip_value,
+            gradient_accumulation_steps=gradient_accumulation_steps,
+        )
+        if loss_scale_type.lower() == "fixed":
+            loss_scale_manager = FixedLossScaleManager(loss_scale=loss_scale, drop_overflow_update=drop_overflow_update)
+        elif loss_scale_type.lower() == "dynamic":
+            loss_scale_manager = DynamicLossScaleManager(init_loss_scale=loss_scale, scale_factor=2, scale_window=2000)
+        else:
+            raise ValueError(f"Loss scale type only support ['fixed', 'dynamic'], but got{loss_scale_type}.")
+        update_cell = loss_scale_manager.get_update_cell()
+        # 1. loss_scale_type="fixed", drop_overflow_update=False
+        # --> update_cell=None, TrainStep=TrainOneStepCell(scale_sense=loss_scale)
+        # 2. loss_scale_type: fixed, drop_overflow_update: True
+        # --> update_cell=FixedLossScaleUpdateCell, TrainStep=TrainOneStepWithLossScaleCell(scale_sense=update_cell)
+        # 3. loss_scale_type: dynamic, drop_overflow_update: True
+        # --> update_cell=DynamicLossScaleUpdateCell, TrainStep=TrainOneStepWithLossScaleCell(scale_sense=update_cell)
+        if update_cell is None:
+            train_step_kwargs["scale_sense"] = Tensor(loss_scale, dtype=ms.float32)
+        else:
+            if not context.get_context("enable_ge") and context.get_context("device_target") == "CPU":
+                raise ValueError(
+                    "Only `loss_scale_type` is `fixed` and `drop_overflow_update` is `False`"
+                    "are supported on device `CPU`."
+                )
+            train_step_kwargs["scale_sense"] = update_cell
+        train_step_cell = TrainStep(**train_step_kwargs).set_train()
+        model = Model(train_step_cell, eval_network=eval_network, metrics=metrics, eval_indexes=[0, 1, 2])
+        # todo: do we need to set model._loss_scale_manager
+    return model
+
+
+
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/tutorials/configuration/index.html b/en/tutorials/configuration/index.html new file mode 100644 index 000000000..dbab770bd --- /dev/null +++ b/en/tutorials/configuration/index.html @@ -0,0 +1,1635 @@ + + + + + + + + + + + + + + + + + + + + + + + + Configuration - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Configuration

+

Download Notebook

+

MindCV can parse the yaml file of the model through the argparse library and PyYAML library to configure parameters. +Let's use squeezenet_1.0 model as an example to explain how to configure the corresponding parameters.

+

Basic Environment

+
    +
  1. Parameter description
  2. +
+
    +
  • +

    mode: Use graph mode (0) or pynative mode (1).

    +
  • +
  • +

    distribute: Whether to use distributed.

    +
  • +
+
    +
  1. +

    Sample yaml file

    +
    mode: 0
    +distribute: True
    +...
    +
    +
  2. +
  3. +

    Parse parameter setting

    +
    python train.py --mode 0 --distribute False ...
    +
    +
  4. +
  5. +

    Corresponding code example

    +
    +

    args.mode represents the parameter mode, args.distribute represents the parameter distribute.

    +
    +
    def train(args):
    +    ms.set_context(mode=args.mode)
    +
    +    if args.distribute:
    +        init()
    +        device_num = get_group_size()
    +        rank_id = get_rank()
    +        ms.set_auto_parallel_context(device_num=device_num,
    +                                     parallel_mode='data_parallel',
    +                                     gradients_mean=True)
    +    else:
    +        device_num = None
    +        rank_id = None
    +    ...
    +
    +
  6. +
+

Dataset

+
    +
  1. Parameter description
  2. +
+
    +
  • +

    dataset: dataset name.

    +
  • +
  • +

    data_dir: Path of dataset file.

    +
  • +
  • +

    shuffle: whether to shuffle the dataset.

    +
  • +
  • +

    dataset_download: whether to download the dataset.

    +
  • +
  • +

    batch_size: The number of rows in each batch.

    +
  • +
  • +

    drop_remainder: Determines whether to drop the last block whose data row number is less than the batch size.

    +
  • +
  • +

    num_parallel_workers: Number of workers(threads) to process the dataset in parallel.

    +
  • +
+
    +
  1. +

    Sample yaml file

    +
    dataset: 'imagenet'
    +data_dir: './imagenet2012'
    +shuffle: True
    +dataset_download: False
    +batch_size: 32
    +drop_remainder: True
    +num_parallel_workers: 8
    +...
    +
    +
  2. +
  3. +

    Parse parameter setting

    +
    python train.py ... --dataset imagenet --data_dir ./imagenet2012 --shuffle True \
    +    --dataset_download False --batch_size 32 --drop_remainder True \
    +    --num_parallel_workers 8 ...
    +
    +
  4. +
  5. +

    Corresponding code example

    +
    def train(args):
    +    ...
    +    dataset_train = create_dataset(
    +        name=args.dataset,
    +        root=args.data_dir,
    +        split='train',
    +        shuffle=args.shuffle,
    +        num_samples=args.num_samples,
    +        num_shards=device_num,
    +        shard_id=rank_id,
    +        num_parallel_workers=args.num_parallel_workers,
    +        download=args.dataset_download,
    +        num_aug_repeats=args.aug_repeats)
    +
    +    ...
    +    target_transform = transforms.OneHot(num_classes) if args.loss == 'BCE' else None
    +
    +    loader_train = create_loader(
    +        dataset=dataset_train,
    +        batch_size=args.batch_size,
    +        drop_remainder=args.drop_remainder,
    +        is_training=True,
    +        mixup=args.mixup,
    +        cutmix=args.cutmix,
    +        cutmix_prob=args.cutmix_prob,
    +        num_classes=args.num_classes,
    +        transform=transform_list,
    +        target_transform=target_transform,
    +        num_parallel_workers=args.num_parallel_workers,
    +    )
    +    ...
    +
    +
  6. +
+

Data Augmentation

+
    +
  1. Parameter description
  2. +
+
    +
  • +

    image_resize: the image size after resizing for adapting to the network.

    +
  • +
  • +

    scale: random resize scale.

    +
  • +
  • +

    ratio: random resize aspect ratio.

    +
  • +
  • +

    hfilp: horizontal flip training aug probability.

    +
  • +
  • +

    interpolation: image interpolation mode for resize operator.

    +
  • +
  • +

    crop_pct: input image center crop percent.

    +
  • +
  • +

    color_jitter: color jitter factor.

    +
  • +
  • +

    re_prob: the probability of performing erasing.

    +
  • +
+
    +
  1. +

    Sample yaml file

    +
    image_resize: 224
    +scale: [0.08, 1.0]
    +ratio: [0.75, 1.333]
    +hflip: 0.5
    +interpolation: 'bilinear'
    +crop_pct: 0.875
    +color_jitter: [0.4, 0.4, 0.4]
    +re_prob: 0.5
    +...
    +
    +
  2. +
  3. +

    Parse parameter setting

    +
    python train.py ... --image_resize 224 --scale [0.08, 1.0] --ratio [0.75, 1.333] \
    +    --hflip 0.5 --interpolation "bilinear" --crop_pct 0.875 \
    +    --color_jitter [0.4, 0.4, 0.4] --re_prob 0.5 ...
    +
    +
  4. +
  5. +

    Corresponding code example

    +
    def train(args):
    +    ...
    +    transform_list = create_transforms(
    +        dataset_name=args.dataset,
    +        is_training=True,
    +        image_resize=args.image_resize,
    +        scale=args.scale,
    +        ratio=args.ratio,
    +        hflip=args.hflip,
    +        vflip=args.vflip,
    +        color_jitter=args.color_jitter,
    +        interpolation=args.interpolation,
    +        auto_augment=args.auto_augment,
    +        mean=args.mean,
    +        std=args.std,
    +        re_prob=args.re_prob,
    +        re_scale=args.re_scale,
    +        re_ratio=args.re_ratio,
    +        re_value=args.re_value,
    +        re_max_attempts=args.re_max_attempts
    +    )
    +    ...
    +
    +
  6. +
+

Model

+
    +
  1. Parameter description
  2. +
+
    +
  • +

    model: model name.

    +
  • +
  • +

    num_classes: number of label classes.

    +
  • +
  • +

    pretrained: whether load pretrained model.

    +
  • +
  • +

    ckpt_path: initialize model from this checkpoint.

    +
  • +
  • +

    keep_checkpoint_max: max number of checkpoint files.

    +
  • +
  • +

    ckpt_save_dir: the path of checkpoint.

    +
  • +
  • +

    epoch_size: train epoch size.

    +
  • +
  • +

    dataset_sink_mode: the dataset sink mode.

    +
  • +
  • +

    amp_level: auto mixed precision level for saving memory and acceleration.

    +
  • +
+
    +
  1. +

    Sample yaml file

    +
    model: 'squeezenet1_0'
    +num_classes: 1000
    +pretrained: False
    +ckpt_path: './squeezenet1_0_gpu.ckpt'
    +keep_checkpoint_max: 10
    +ckpt_save_dir: './ckpt/'
    +epoch_size: 200
    +dataset_sink_mode: True
    +amp_level: 'O0'
    +...
    +
    +
  2. +
  3. +

    Parse parameter setting

    +
    python train.py ... --model squeezenet1_0 --num_classes 1000 --pretrained False \
    +    --ckpt_path ./squeezenet1_0_gpu.ckpt --keep_checkpoint_max 10 \
    +    --ckpt_save_path ./ckpt/ --epoch_size 200 --dataset_sink_mode True \
    +    --amp_level O0 ...
    +
    +
  4. +
  5. +

    Corresponding code example

    +
    def train(args):
    +    ...
    +    network = create_model(model_name=args.model,
    +        num_classes=args.num_classes,
    +        in_channels=args.in_channels,
    +        drop_rate=args.drop_rate,
    +        drop_path_rate=args.drop_path_rate,
    +        pretrained=args.pretrained,
    +        checkpoint_path=args.ckpt_path,
    +        ema=args.ema
    +    )
    +    ...
    +
    +
  6. +
+

Loss Function

+
    +
  1. Parameter description
  2. +
+
    +
  • +

    loss: name of loss function, BCE (BinaryCrossEntropy) or CE (CrossEntropy).

    +
  • +
  • +

    label_smoothing: use label smoothing.

    +
  • +
+
    +
  1. +

    Sample yaml file

    +
    loss: 'CE'
    +label_smoothing: 0.1
    +...
    +
    +
  2. +
  3. +

    Parse parameter setting

    +
    python train.py ... --loss CE --label_smoothing 0.1 ...
    +
    +
  4. +
  5. +

    Corresponding code example

    +
    def train(args):
    +    ...
    +    loss = create_loss(name=args.loss,
    +        reduction=args.reduction,
    +        label_smoothing=args.label_smoothing,
    +        aux_factor=args.aux_factor
    +     )
    +    ...
    +
    +
  6. +
+

Learning Rate Scheduler

+
    +
  1. Parameter description
  2. +
+
    +
  • +

    scheduler: name of scheduler.

    +
  • +
  • +

    min_lr: the minimum value of learning rate if the scheduler supports.

    +
  • +
  • +

    lr: learning rate.

    +
  • +
  • +

    warmup_epochs: warmup epochs.

    +
  • +
  • +

    decay_epochs: decay epochs.

    +
  • +
+
    +
  1. +

    Sample yaml file

    +
    scheduler: 'cosine_decay'
    +min_lr: 0.0
    +lr: 0.01
    +warmup_epochs: 0
    +decay_epochs: 200
    +...
    +
    +
  2. +
  3. +

    Parse parameter setting

    +
    python train.py ... --scheduler cosine_decay --min_lr 0.0 --lr 0.01 \
    +    --warmup_epochs 0 --decay_epochs 200 ...
    +
    +
  4. +
  5. +

    Corresponding code example

    +
    def train(args):
    +    ...
    +    lr_scheduler = create_scheduler(num_batches,
    +        scheduler=args.scheduler,
    +        lr=args.lr,
    +        min_lr=args.min_lr,
    +        warmup_epochs=args.warmup_epochs,
    +        warmup_factor=args.warmup_factor,
    +        decay_epochs=args.decay_epochs,
    +        decay_rate=args.decay_rate,
    +        milestones=args.multi_step_decay_milestones,
    +        num_epochs=args.epoch_size,
    +        lr_epoch_stair=args.lr_epoch_stair
    +    )
    +    ...
    +
    +
  6. +
+

Optimizer

+
    +
  1. Parameter description
  2. +
+
    +
  • +

    opt: name of optimizer.

    +
  • +
  • +

    filter_bias_and_bn: filter Bias and BatchNorm.

    +
  • +
  • +

    momentum: Hyperparameter of type float, means momentum for the moving average.

    +
  • +
  • +

    weight_decay: weight decay (L2 penalty).

    +
  • +
  • +

    loss_scale: gradient scaling factor

    +
  • +
  • +

    use_nesterov: whether enables the Nesterov momentum

    +
  • +
+
    +
  1. +

    Sample yaml file

    +
    opt: 'momentum'
    +filter_bias_and_bn: True
    +momentum: 0.9
    +weight_decay: 0.00007
    +loss_scale: 1024
    +use_nesterov: False
    +...
    +
    +
  2. +
  3. +

    Parse parameter setting

    +
    python train.py ... --opt momentum --filter_bias_and_bn True --weight_decay 0.00007 \
    +    --loss_scale 1024 --use_nesterov False ...
    +
    +
  4. +
  5. +

    Corresponding code example

    +
    def train(args):
    +    ...
    +    if args.ema:
    +        optimizer = create_optimizer(network.trainable_params(),
    +            opt=args.opt,
    +            lr=lr_scheduler,
    +            weight_decay=args.weight_decay,
    +            momentum=args.momentum,
    +            nesterov=args.use_nesterov,
    +            filter_bias_and_bn=args.filter_bias_and_bn,
    +            loss_scale=args.loss_scale,
    +            checkpoint_path=opt_ckpt_path,
    +            eps=args.eps
    +        )
    +    else:
    +        optimizer = create_optimizer(network.trainable_params(),
    +            opt=args.opt,
    +            lr=lr_scheduler,
    +            weight_decay=args.weight_decay,
    +            momentum=args.momentum,
    +            nesterov=args.use_nesterov,
    +            filter_bias_and_bn=args.filter_bias_and_bn,
    +            checkpoint_path=opt_ckpt_path,
    +            eps=args.eps
    +        )
    +    ...
    +
    +
  6. +
+

Combination of Yaml and Parse

+

You can override the parameter settings in the yaml file by using parse to set parameters. Take the following shell command as an example,

+
python train.py -c ./configs/squeezenet/squeezenet_1.0_gpu.yaml --data_dir ./data
+
+

The above command overwrites the value of args.data_dir parameter from ./imagenet2012 in yaml file to ./data.

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/tutorials/deployment/index.html b/en/tutorials/deployment/index.html new file mode 100644 index 000000000..d8957cdbf --- /dev/null +++ b/en/tutorials/deployment/index.html @@ -0,0 +1,1290 @@ + + + + + + + + + + + + + + + + + + + + + + + + Deployment - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Inference Service Deployment

+

MindSpore Serving is a lightweight and high-performance service module that helps MindSpore developers efficiently deploy online inference services in the production environment. After completing model training on MindSpore, you can export the MindSpore model and use MindSpore Serving to create an inference service for the model.

+

This tutorial uses mobilenet_v2_100 network as an example to describe how to deploy the Inference Service based on MindSpore Serving.

+

Environment Preparation

+

Before deploying, ensure that MindSpore Serving has been properly installed and the environment variables are configured. To install and configure MindSpore Serving on your PC, go to the MindSpore Serving installation page.

+

Exporting the Model

+

To implement cross-platform or hardware inference (e.g., Ascend AI processor, MindSpore device side, GPU, etc.), the model file of MindIR format should be generated by network definition and CheckPoint. In MindSpore, the function of exporting the network model is export and the main parameters are as follows:

+
    +
  • net: MindSpore network structure.
  • +
  • inputs: Network input, the supported input type is Tensor. If multiple values are input, the values should be input at the same time, for example, ms.export(network, ms.Tensor(input1), ms.Tensor(input2), file_name='network', file_format='MINDIR').
  • +
  • file_name: Name of the exported model file. If file_name doesn't contain the corresponding suffix (for example, .mindir), the system will automatically add one after file_format is set.
  • +
  • file_format: MindSpore currently supports ‘AIR’, ‘ONNX’ and ‘MINDIR’ format for exported models.
  • +
+

The following code uses mobilenet_v2_100 as an example to export the pretrained network model of MindCV and obtain the model file in MindIR format.

+
from mindcv.models import create_model
+import numpy as np
+import mindspore as ms
+
+model = create_model(model_name='mobilenet_v2_100', num_classes=1000, pretrained=True)
+
+input_np = np.random.uniform(0.0, 1.0, size=[1, 3, 224, 224]).astype(np.float32)
+
+# Export mobilenet_v2_100.mindir to the current folder.
+ms.export(model, ms.Tensor(input_np), file_name='mobilenet_v2_100', file_format='MINDIR')
+
+

Deploying the Serving Inference Service

+

Configuring the Service

+

Start Serving with the following files:

+
demo
+├── mobilenet_v2_100
+│   ├── 1
+│   │   └── mobilenet_v2_100.mindir
+│   └── servable_config.py
+│── serving_server.py
+├── serving_client.py
+├── imagenet1000_clsidx_to_labels.txt
+└── test_image
+    ├─ dog
+    │   ├─ dog.jpg
+    │   └─ ……
+    └─ ……
+
+
    +
  • mobilenet_v2_100: Model folder. The folder name is the model name.
  • +
  • mobilenet_v2_100.mindir: Model file generated by the network in the previous step, which is stored in folder 1 (the number indicates the version number). Different versions are stored in different folders. The version number must be a string of digits. By default, the latest model file is started.
  • +
  • servable_config.py: Model configuration script. Declare the model and specify the input and output parameters of the model.
  • +
  • serving_server.py: Script to start the Serving server.
  • +
  • serving_client.py: Script to start the Python client.
  • +
  • imagenet1000_clsidx_to_labels.txt: Index of 1000 labels for the ImageNet dataset, available at examples/data/.
  • +
  • test_image: Test images, available at README.
  • +
+

Content of the configuration file servable_config.py:

+
from mindspore_serving.server import register
+
+# Declare the model. The parameter model_file indicates the name of the model file and model_format indicates the model type.
+model = register.declare_model(model_file="mobilenet_v2_100.mindir", model_format="MindIR")
+
+# The input parameters of the Servable method are specified by the input parameters of the Python method. The output parameters of the Servable method are specified by the output_names of register_method.
+@register.register_method(output_names=["score"])
+def predict(image):
+    x = register.add_stage(model, image, outputs_count=1)
+    return x
+
+

Starting the Service

+

The server function of MindSpore can provide deployment service through either gRPC or RESTful. The following uses gRPC as an example. The service startup script serving_server.py deploys the mobilenet_v2_100 in the local directory to device 0 and starts the gRPC server at 127.0.0.1:5500. Content of the script:

+
import os
+import sys
+from mindspore_serving import server
+
+def start():
+    servable_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
+
+    servable_config = server.ServableStartConfig(servable_directory=servable_dir, servable_name="mobilenet_v2_100",
+                                                 device_ids=0)
+    server.start_servables(servable_configs=servable_config)
+    server.start_grpc_server(address="127.0.0.1:5500")
+
+if __name__ == "__main__":
+    start()
+
+

If the following log information is displayed on the server, the gRPC service is started successfully.

+
Serving gRPC server start success, listening on 127.0.0.1:5500
+
+

Inference Execution

+

Start the Python client by using serving_client.py. The client script uses the create_transforms, create_dataset and create_loader functions of mindcv.data to preprocess the image and send the image to the serving server, then postprocesse the result returned by the server and prints the prediction label of the image.

+
import os
+from mindspore_serving.client import Client
+import numpy as np
+from mindcv.data import create_transforms, create_dataset, create_loader
+
+num_workers = 1
+
+# Dataset directory path
+data_dir = "./test_image/"
+
+dataset = create_dataset(root=data_dir, split='', num_parallel_workers=num_workers)
+transforms_list = create_transforms(dataset_name='ImageNet', is_training=False)
+data_loader = create_loader(
+    dataset=dataset,
+    batch_size=1,
+    is_training=False,
+    num_classes=1000,
+    transform=transforms_list,
+    num_parallel_workers=num_workers
+)
+with open("imagenet1000_clsidx_to_labels.txt") as f:
+    idx2label = eval(f.read())
+
+def postprocess(score):
+    max_idx = np.argmax(score)
+    return idx2label[max_idx]
+
+def predict():
+    client = Client("127.0.0.1:5500", "mobilenet_v2_100", "predict")
+    instances = []
+    images, _ = next(data_loader.create_tuple_iterator())
+    image_np = images.asnumpy().squeeze()
+    instances.append({"image": image_np})
+    result = client.infer(instances)
+
+    for instance in result:
+        label = postprocess(instance["score"])
+        print(label)
+
+if __name__ == '__main__':
+    predict()
+
+

If the following information is displayed, serving service has correctly executed the inference of the mobilenet_v2_100 model: +

Labrador retriever
+

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/tutorials/finetune/index.html b/en/tutorials/finetune/index.html new file mode 100644 index 000000000..3c96dbf54 --- /dev/null +++ b/en/tutorials/finetune/index.html @@ -0,0 +1,1706 @@ + + + + + + + + + + + + + + + + + + + + + + + + Finetune - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Model Fine-Tuning Training

+

Download Notebook

+

In this tutorial, you will learn how to use MindCV for transfer Learning to solve the problem of image classification on custom datasets. +In the deep learning task, we often encounter the problem of insufficient training data. At this time, it is difficult to train the entire network directly to achieve the desired accuracy. +A better approach is to use a pretrained model on a large dataset (close to the task data), and then use the model to initialize the network's weight parameters or apply it to specific tasks as a fixed feature extractor.

+

This tutorial will use the DenseNet model pretrained on ImageNet as an example to introduce two different fine-tuning strategies to solve the image classification problem of wolves and dogs in the case of small samples:

+
    +
  1. Overall model fine-tuning.
  2. +
  3. Freeze backbone and only fine-tune the classifier.
  4. +
+
+

For details of transfer learning, see Stanford University CS231n

+
+

Data Preparation

+

Download Dataset

+

Download the dog and wolf classification dataset used in the case. +Each category has 120 training images and 30 verification images. Use the mindcv.utils.download interface to download the dataset, and automatically unzip the downloaded dataset to the current directory.

+
import os
+from mindcv.utils.download import DownLoad
+
+dataset_url = "https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/notebook/datasets/intermediate/Canidae_data.zip"
+root_dir = "./"
+
+if not os.path.exists(os.path.join(root_dir, 'data/Canidae')):
+    DownLoad().download_and_extract_archive(dataset_url, root_dir)
+
+

The directory structure of the dataset is as follows:

+
data/
+└── Canidae
+    ├── train
+    │   ├── dogs
+    │   └── wolves
+    └── val
+        ├── dogs
+        └── wolves
+
+

Dataset Loading and Processing

+

Loading Custom Datasets

+

By calling the create_dataset function in mindcv.data, we can easily load preset and customized datasets.

+
    +
  • When the parameter name is set to null, it is specified as a user-defined dataset. (Default)
  • +
  • When the parameter name is set to be MNIST, CIFAR10 or other standard dataset names, it is specified as the preset dataset.
  • +
+

At the same time, we need to set the path data_dir of the dataset and the name split of the data segmentation (such as train, val) to load the corresponding training set or validation set.

+
from mindcv.data import create_dataset, create_transforms, create_loader
+
+num_workers = 8
+
+# path of dataset
+data_dir = "./data/Canidae/"
+
+# load dataset
+dataset_train = create_dataset(root=data_dir, split='train', num_parallel_workers=num_workers)
+dataset_val = create_dataset(root=data_dir, split='val', num_parallel_workers=num_workers)
+
+
+

Note: The directory structure of the custom dataset should be the same as ImageNet, that is, the hierarchy of root ->split ->class ->image

+
+
DATASET_NAME
+    ├── split1(e.g. train)/
+    │  ├── class1/
+    │  │   ├── 000001.jpg
+    │  │   ├── 000002.jpg
+    │  │   └── ....
+    │  └── class2/
+    │      ├── 000001.jpg
+    │      ├── 000002.jpg
+    │      └── ....
+    └── split2/
+       ├── class1/
+       │   ├── 000001.jpg
+       │   ├── 000002.jpg
+       │   └── ....
+       └── class2/
+           ├── 000001.jpg
+           ├── 000002.jpg
+           └── ....
+
+

Data Processing and Augmentation

+

First, we call the create_transforms function to obtain the preset data processing and augmentation strategy (transform list). In this task, because the file structure of the wolf-dog dataset is consistent with that of the ImageNet dataset, we specify the parameter dataset_name as ImageNet, and directly use the preset ImageNet data processing and image augmentation strategy. create_transforms also supports a variety of customized processing and enhancement operations, as well as automatic enhancement policies (AutoAug). See API description for details.

+

We will transfer the obtained transform list to the create_loader(), specify batch_size and other parameters to complete the preparation of training and validation data, and return the Dataset Object as the input of the model.

+
# Define and acquire data processing and augment operations
+trans_train = create_transforms(dataset_name='ImageNet', is_training=True)
+trans_val = create_transforms(dataset_name='ImageNet',is_training=False)
+
+loader_train = create_loader(
+    dataset=dataset_train,
+    batch_size=16,
+    is_training=True,
+    num_classes=2,
+    transform=trans_train,
+    num_parallel_workers=num_workers,
+)
+loader_val = create_loader(
+    dataset=dataset_val,
+    batch_size=5,
+    is_training=True,
+    num_classes=2,
+    transform=trans_val,
+    num_parallel_workers=num_workers,
+)
+
+

Dataset Visualization

+

For the Dataset object returned by the create_loader interface to complete data loading, we can create a data iterator through the create_tuple_iterator interface, access the dataset using the next iteration, and read a batch of data.

+
images, labels = next(loader_train.create_tuple_iterator())
+print("Tensor of image", images.shape)
+print("Labels:", labels)
+
+
Tensor of image (16, 3, 224, 224)
+Labels: [0 1 1 1 1 0 0 0 0 0 1 0 1 0 1 1]
+
+

Visualize the acquired image and label data, and the title is the label name corresponding to the image.

+
import matplotlib.pyplot as plt
+import numpy as np
+
+# class_name corresponds to label, and labels are marked in the order of folder string from small to large
+class_name = {0: "dogs", 1: "wolves"}
+
+plt.figure(figsize=(15, 7))
+for i in range(len(labels)):
+    # Get the image and its corresponding label
+    data_image = images[i].asnumpy()
+    data_label = labels[i]
+    # Process images for display
+    data_image = np.transpose(data_image, (1, 2, 0))
+    mean = np.array([0.485, 0.456, 0.406])
+    std = np.array([0.229, 0.224, 0.225])
+    data_image = std * data_image + mean
+    data_image = np.clip(data_image, 0, 1)
+    # Show Image
+    plt.subplot(3, 6, i + 1)
+    plt.imshow(data_image)
+    plt.title(class_name[int(labels[i].asnumpy())])
+    plt.axis("off")
+
+plt.show()
+
+

png

+

Model Fine-Tuning

+

1. Overall Model Fine-Tuning

+

Pretraining Model Loading

+

We use mindcv.models.densenet to define the DenseNet121 network. When the pretrained parameter in the interface is set to True, the network weight can be automatically downloaded. +Since the pretraining model is used to classify 1000 categories in the ImageNet dataset, we set num_classes=2, and the output of DenseNet's classifier (the last FC layer) is adjusted to two dimensions. At this time, only the pre-trained weights of the backbone are loaded, while the classifier uses the initial value.

+
from mindcv.models import create_model
+
+network = create_model(model_name='densenet121', num_classes=2, pretrained=True)
+
+
+

For the specific structure of DenseNet, see the DenseNet paper.

+
+

Model Training

+

Use the loaded and processed wolf and dog images with tags to fine-tune the DenseNet network. Note that smaller learning rates should be used when fine-tuning the overall model.

+
from mindcv.loss import create_loss
+from mindcv.optim import create_optimizer
+from mindcv.scheduler import create_scheduler
+from mindspore import Model, LossMonitor, TimeMonitor
+
+# Define optimizer and loss function
+opt = create_optimizer(network.trainable_params(), opt='adam', lr=1e-4)
+loss = create_loss(name='CE')
+
+# Instantiated model
+model = Model(network, loss_fn=loss, optimizer=opt, metrics={'accuracy'})
+model.train(10, loader_train, callbacks=[LossMonitor(5), TimeMonitor(5)], dataset_sink_mode=False)
+
+
epoch: 1 step: 5, loss is 0.5195528864860535
+epoch: 1 step: 10, loss is 0.2654373049736023
+epoch: 1 step: 15, loss is 0.28758567571640015
+Train epoch time: 17270.144 ms, per step time: 1151.343 ms
+epoch: 2 step: 5, loss is 0.1807008981704712
+epoch: 2 step: 10, loss is 0.1700802594423294
+epoch: 2 step: 15, loss is 0.09752683341503143
+Train epoch time: 1372.549 ms, per step time: 91.503 ms
+epoch: 3 step: 5, loss is 0.13594701886177063
+epoch: 3 step: 10, loss is 0.03628234937787056
+epoch: 3 step: 15, loss is 0.039737217128276825
+Train epoch time: 1453.237 ms, per step time: 96.882 ms
+epoch: 4 step: 5, loss is 0.014213413000106812
+epoch: 4 step: 10, loss is 0.030747078359127045
+epoch: 4 step: 15, loss is 0.0798817127943039
+Train epoch time: 1331.237 ms, per step time: 88.749 ms
+epoch: 5 step: 5, loss is 0.009510636329650879
+epoch: 5 step: 10, loss is 0.02603740245103836
+epoch: 5 step: 15, loss is 0.051846928894519806
+Train epoch time: 1312.737 ms, per step time: 87.516 ms
+epoch: 6 step: 5, loss is 0.1163717582821846
+epoch: 6 step: 10, loss is 0.02439398318529129
+epoch: 6 step: 15, loss is 0.02564268559217453
+Train epoch time: 1434.704 ms, per step time: 95.647 ms
+epoch: 7 step: 5, loss is 0.013310655951499939
+epoch: 7 step: 10, loss is 0.02289542555809021
+epoch: 7 step: 15, loss is 0.1992517113685608
+Train epoch time: 1275.935 ms, per step time: 85.062 ms
+epoch: 8 step: 5, loss is 0.015928998589515686
+epoch: 8 step: 10, loss is 0.011409260332584381
+epoch: 8 step: 15, loss is 0.008141174912452698
+Train epoch time: 1323.102 ms, per step time: 88.207 ms
+epoch: 9 step: 5, loss is 0.10395607352256775
+epoch: 9 step: 10, loss is 0.23055407404899597
+epoch: 9 step: 15, loss is 0.04896317049860954
+Train epoch time: 1261.067 ms, per step time: 84.071 ms
+epoch: 10 step: 5, loss is 0.03162381425499916
+epoch: 10 step: 10, loss is 0.13094250857830048
+epoch: 10 step: 15, loss is 0.020028553903102875
+Train epoch time: 1217.958 ms, per step time: 81.197 ms
+
+

Model Evaluation

+

After the training, we evaluate the accuracy of the model on the validation set.

+
res = model.eval(loader_val)
+print(res)
+
+
{'accuracy': 1.0}
+
+

Visual Model Inference Results

+

Define visualize_mode function and visualize model prediction.

+
import matplotlib.pyplot as plt
+import mindspore as ms
+
+def visualize_model(model, val_dl, num_classes=2):
+    # Load the data of the validation set for validation
+    images, labels= next(val_dl.create_tuple_iterator())
+    # Predict image class
+    output = model.predict(images)
+    pred = np.argmax(output.asnumpy(), axis=1)
+    # Display images and their predicted values
+    images = images.asnumpy()
+    labels = labels.asnumpy()
+    class_name = {0: "dogs", 1: "wolves"}
+    plt.figure(figsize=(15, 7))
+    for i in range(len(labels)):
+        plt.subplot(3, 6, i + 1)
+        # If the prediction is correct, it is displayed in blue; If the prediction is wrong, it is displayed in red
+        color = 'blue' if pred[i] == labels[i] else 'red'
+        plt.title('predict:{}'.format(class_name[pred[i]]), color=color)
+        picture_show = np.transpose(images[i], (1, 2, 0))
+        mean = np.array([0.485, 0.456, 0.406])
+        std = np.array([0.229, 0.224, 0.225])
+        picture_show = std * picture_show + mean
+        picture_show = np.clip(picture_show, 0, 1)
+        plt.imshow(picture_show)
+        plt.axis('off')
+
+    plt.show()
+
+

Use the finely tuned model piece to predict the wolf and dog image data of the verification set. If the prediction font is blue, the prediction is correct; if the prediction font is red, the prediction is wrong.

+
visualize_model(model, loader_val)
+
+

png

+

2. Freeze Backbone and Fine-Tune the Classifier

+

Freezing Backbone Parameters

+

First, we need to freeze all network layers except the last layer classifier, that is, set the requires_grad attribute of the corresponding layer parameter to False, so that it does not calculate the gradient and update the parameters in the backpropagation.

+

Because all models in mindcv.models use a classifier to identify and name the classifier of the model (i.e., the Dense layer), the parameters of each layer outside the classifier can be filtered through classifier.weight and classifier.bias, and its requires_grad attribute is set to False.

+
# freeze backbone
+for param in network.get_parameters():
+    if param.name not in ["classifier.weight", "classifier.bias"]:
+        param.requires_grad = False
+
+

Fine-Tune Classifier

+

Because the feature network has been fixed, we don't have to worry about distortpratised features in the training process. Therefore, compared with the first method, we can increase the learning rate.

+

Compared with no pretraining model, it will save more than half of the time, because partial gradient can not be calculated at this time.

+
# dataset load
+dataset_train = create_dataset(root=data_dir, split='train', num_parallel_workers=num_workers)
+loader_train = create_loader(
+    dataset=dataset_train,
+    batch_size=16,
+    is_training=True,
+    num_classes=2,
+    transform=trans_train,
+    num_parallel_workers=num_workers,
+)
+
+# Define optimizer and loss function
+opt = create_optimizer(network.trainable_params(), opt='adam', lr=1e-3)
+loss = create_loss(name='CE')
+
+# Instantiated model
+model = Model(network, loss_fn=loss, optimizer=opt, metrics={'accuracy'})
+
+model.train(10, loader_train, callbacks=[LossMonitor(5), TimeMonitor(5)], dataset_sink_mode=False)
+
+
epoch: 1 step: 5, loss is 0.051333948969841
+epoch: 1 step: 10, loss is 0.02043312042951584
+epoch: 1 step: 15, loss is 0.16161368787288666
+Train epoch time: 10228.601 ms, per step time: 681.907 ms
+epoch: 2 step: 5, loss is 0.002121545374393463
+epoch: 2 step: 10, loss is 0.0009798109531402588
+epoch: 2 step: 15, loss is 0.015776708722114563
+Train epoch time: 562.543 ms, per step time: 37.503 ms
+epoch: 3 step: 5, loss is 0.008056879043579102
+epoch: 3 step: 10, loss is 0.0009347647428512573
+epoch: 3 step: 15, loss is 0.028648357838392258
+Train epoch time: 523.249 ms, per step time: 34.883 ms
+epoch: 4 step: 5, loss is 0.001014217734336853
+epoch: 4 step: 10, loss is 0.0003159046173095703
+epoch: 4 step: 15, loss is 0.0007699579000473022
+Train epoch time: 508.886 ms, per step time: 33.926 ms
+epoch: 5 step: 5, loss is 0.0015687644481658936
+epoch: 5 step: 10, loss is 0.012090332806110382
+epoch: 5 step: 15, loss is 0.004598274827003479
+Train epoch time: 507.243 ms, per step time: 33.816 ms
+epoch: 6 step: 5, loss is 0.010022152215242386
+epoch: 6 step: 10, loss is 0.0066385045647621155
+epoch: 6 step: 15, loss is 0.0036080628633499146
+Train epoch time: 517.646 ms, per step time: 34.510 ms
+epoch: 7 step: 5, loss is 0.01344013586640358
+epoch: 7 step: 10, loss is 0.0008538365364074707
+epoch: 7 step: 15, loss is 0.14135593175888062
+Train epoch time: 511.513 ms, per step time: 34.101 ms
+epoch: 8 step: 5, loss is 0.01626245677471161
+epoch: 8 step: 10, loss is 0.02871556021273136
+epoch: 8 step: 15, loss is 0.010110966861248016
+Train epoch time: 545.678 ms, per step time: 36.379 ms
+epoch: 9 step: 5, loss is 0.008498094975948334
+epoch: 9 step: 10, loss is 0.2588501274585724
+epoch: 9 step: 15, loss is 0.0014278888702392578
+Train epoch time: 499.243 ms, per step time: 33.283 ms
+epoch: 10 step: 5, loss is 0.021337147802114487
+epoch: 10 step: 10, loss is 0.00829876959323883
+epoch: 10 step: 15, loss is 0.008352771401405334
+Train epoch time: 465.600 ms, per step time: 31.040 ms
+
+

Model Evaluation

+

After the training, we evaluate the accuracy of the model on the validation set.

+
dataset_val = create_dataset(root=data_dir, split='val', num_parallel_workers=num_workers)
+loader_val = create_loader(
+    dataset=dataset_val,
+    batch_size=5,
+    is_training=True,
+    num_classes=2,
+    transform=trans_val,
+    num_parallel_workers=num_workers,
+)
+
+res = model.eval(loader_val)
+print(res)
+
+
{'accuracy': 1.0}
+
+

Visual Model Prediction

+

Use the finely tuned model piece to predict the wolf and dog image data of the verification set. If the prediction font is blue, the prediction is correct; if the prediction font is red, the prediction is wrong.

+
visualize_model(model, loader_val)
+
+

png

+

The prediction results of wolf/dog after fine-tuning are correct.

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/tutorials/inference/index.html b/en/tutorials/inference/index.html new file mode 100644 index 000000000..af6dbb259 --- /dev/null +++ b/en/tutorials/inference/index.html @@ -0,0 +1,1288 @@ + + + + + + + + + + + + + + + + + + + + + + + + Inference - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Image Classification Prediction

+

Download Notebook

+

This tutorial introduces how to call the pretraining model in MindCV to make classification prediction on the test image.

+

Model Loading

+

View All Available Models

+

By calling the registry.list_models function in mindcv.models, the names of all network models can be printed. The models of a network in different parameter configurations will also be printed, such as resnet18 / resnet34 / resnet50 / resnet101 / resnet152.

+
import sys
+sys.path.append("..")
+from mindcv.models import registry
+registry.list_models()
+
+
['BiT_resnet50',
+ 'repmlp_b224',
+ 'repmlp_b256',
+ 'repmlp_d256',
+ 'repmlp_l256',
+ 'repmlp_t224',
+ 'repmlp_t256',
+ 'convit_base',
+ 'convit_base_plus',
+ 'convit_small',
+ ...
+ 'visformer_small',
+ 'visformer_small_v2',
+ 'visformer_tiny',
+ 'visformer_tiny_v2',
+ 'vit_b_16_224',
+ 'vit_b_16_384',
+ 'vit_b_32_224',
+ 'vit_b_32_384',
+ 'vit_l_16_224',
+ 'vit_l_16_384',
+ 'vit_l_32_224',
+ 'xception']
+
+

Load Pretraining Model

+

Taking the resnet50 model as an example, we introduce two methods to load the model checkpoint using the create_model function in mindcv.models.

+

1). When the pretrained parameter in the interface is set to True, network weights can be automatically downloaded.

+
from mindcv.models import create_model
+model = create_model(model_name='resnet50', num_classes=1000, pretrained=True)
+# Switch the execution logic of the network to the inference scenario
+model.set_train(False)
+
+
102453248B [00:16, 6092186.31B/s]
+
+ResNet<
+  (conv1): Conv2d<input_channels=3, output_channels=64, kernel_size=(7, 7), stride=(2, 2), pad_mode=pad, padding=3, dilation=(1, 1), group=1, has_bias=False, weight_init=normal, bias_init=zeros, format=NCHW>
+  (bn1): BatchNorm2d<num_features=64, eps=1e-05, momentum=0.9, gamma=Parameter (name=bn1.gamma, shape=(64,), dtype=Float32, requires_grad=True), beta=Parameter (name=bn1.beta, shape=(64,), dtype=Float32, requires_grad=True), moving_mean=Parameter (name=bn1.moving_mean, shape=(64,), dtype=Float32, requires_grad=False), moving_variance=Parameter (name=bn1.moving_variance, shape=(64,), dtype=Float32, requires_grad=False)>
+  (relu): ReLU<>
+  (max_pool): MaxPool2d<kernel_size=3, stride=2, pad_mode=SAME>
+  ...
+  (pool): GlobalAvgPooling<>
+  (classifier): Dense<input_channels=2048, output_channels=1000, has_bias=True>
+  >
+
+

2). When the checkpoint_path parameter in the interface is set to the file path, the model parameter file with the .ckpt can be loaded.

+
from mindcv.models import create_model
+model = create_model(model_name='resnet50', num_classes=1000, checkpoint_path='./resnet50_224.ckpt')
+# Switch the execution logic of the network to the inference scenario
+model.set_train(False)
+
+

Data Preparation

+

Create Dataset

+

Here, we download a Wikipedia image as a test image, and use the create_dataset function in mindcv.data to construct a custom dataset for a single image.

+
from mindcv.data import create_dataset
+num_workers = 1
+# path of dataset
+data_dir = "./data/"
+dataset = create_dataset(root=data_dir, split='test', num_parallel_workers=num_workers)
+# Image visualization
+from PIL import Image
+Image.open("./data/test/dog/dog.jpg")
+
+

png

+

Data Preprocessing

+

Call the create_transforms function to obtain the data processing strategy (transform list) of the ImageNet dataset used by the pre-trained model.

+

We pass the obtained transform list into the create_loader function, specify batch_size=1 and other parameters, and then complete the preparation of test data. The Dataset object is returned as the input of the model.

+
from mindcv.data import create_transforms, create_loader
+transforms_list = create_transforms(dataset_name='imagenet', is_training=False)
+data_loader = create_loader(
+    dataset=dataset,
+    batch_size=1,
+    is_training=False,
+    num_classes=1000,
+    transform=transforms_list,
+    num_parallel_workers=num_workers
+)
+
+

Model Inference

+

The picture of the user-defined dataset is transferred to the model to obtain the inference result. Here, use the Squeeze function of mindspore.ops to remove the batch dimension.

+
import mindspore.ops as P
+import numpy as np
+images, _ = next(data_loader.create_tuple_iterator())
+output = P.Squeeze()(model(images))
+pred = np.argmax(output.asnumpy())
+
+
with open("imagenet1000_clsidx_to_labels.txt") as f:
+    idx2label = eval(f.read())
+print('predict: {}'.format(idx2label[pred]))
+
+
predict: Labrador retriever
+
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/en/tutorials/quick_start/index.html b/en/tutorials/quick_start/index.html new file mode 100644 index 000000000..93225d439 --- /dev/null +++ b/en/tutorials/quick_start/index.html @@ -0,0 +1,1419 @@ + + + + + + + + + + + + + + + + + + + + + + + + Quick Start - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Quick Start

+

Download Notebook

+

MindCV is an open-source toolbox for computer vision research and development based on MindSpore. +It collects a series of classic and SoTA vision models, such as ResNet and SwinTransformer, along with their pretrained weights. +SoTA methods such as AutoAugment are also provided for performance improvement. +With the decoupled module design, it is easy to apply or adapt MindCV to your own CV tasks. +In this tutorial, we will provide a quick start guideline for MindCV.

+

This tutorial will take DenseNet classification model as an example to implement transfer training on CIFAR-10 dataset and explain the usage of MindCV modules in this process.

+

Environment Setting

+

See Installation for details.

+

Data

+

Dataset

+

Through the create_dataset module in mindcv.data, we can quickly load standard datasets or customized datasets.

+
import os
+from mindcv.data import create_dataset, create_transforms, create_loader
+
+cifar10_dir = './datasets/cifar/cifar-10-batches-bin'  # your dataset path
+num_classes = 10  # num of classes
+num_workers = 8  # num of parallel workers
+
+# create dataset
+dataset_train = create_dataset(
+    name='cifar10', root=cifar10_dir, split='train', shuffle=True, num_parallel_workers=num_workers
+)
+
+

Transform

+

Through the create_transforms function, you can directly obtain the appropriate data processing augmentation strategies (transform list) for standard datasets, including common data processing strategies on Cifar10 and Imagenet.

+
# create transforms
+trans = create_transforms(dataset_name='cifar10', image_resize=224)
+
+

Loader

+

The mindcv.data.create_loader function is used for data conversion and batch split loading. We need to pass in the transform_list returned by create_transforms.

+
# Perform data augmentation operations to generate the required dataset.
+loader_train = create_loader(dataset=dataset_train,
+                             batch_size=64,
+                             is_training=True,
+                             num_classes=num_classes,
+                             transform=trans,
+                             num_parallel_workers=num_workers)
+
+num_batches = loader_train.get_dataset_size()
+
+
+

Avoid repeatedly executing a single cell of create_loader in notebook, or execute again after executing create_dataset.

+
+

Model

+

Use the create_model interface to obtain the instantiated DenseNet and load the pretraining weight(obtained from ImageNet dataset training).

+
from mindcv.models import create_model
+
+# instantiate the DenseNet121 model and load the pretraining weights.
+network = create_model(model_name='densenet121', num_classes=num_classes, pretrained=True)
+
+
+

Because the number of classes required by CIFAR-10 and ImageNet datasets is different, the classifier parameters cannot be shared, and the warning that the classifier parameters cannot be loaded does not affect the fine-tuning.

+
+

Loss

+

By create_loss interface obtains loss function.

+
from mindcv.loss import create_loss
+
+loss = create_loss(name='CE')
+
+

Learning Rate Scheduler

+

Use create_scheduler interface to set the learning rate scheduler.

+
from mindcv.scheduler import create_scheduler
+
+# learning rate scheduler
+lr_scheduler = create_scheduler(steps_per_epoch=num_batches,
+                                scheduler='constant',
+                                lr=0.0001)
+
+

Optimizer

+

Use create_optimizer interface creates an optimizer.

+
from mindcv.optim import create_optimizer
+
+# create optimizer
+opt = create_optimizer(network.trainable_params(), opt='adam', lr=lr_scheduler)
+
+

Training

+

Use the mindspore.Model interface to encapsulate trainable instances according to the parameters passed in by the user.

+
from mindspore import Model
+
+# Encapsulates examples that can be trained or inferred
+model = Model(network, loss_fn=loss, optimizer=opt, metrics={'accuracy'})
+
+

Use the mindspore.Model.train interface for model training.

+
from mindspore import LossMonitor, TimeMonitor, CheckpointConfig, ModelCheckpoint
+
+# Set the callback function for saving network parameters during training.
+ckpt_save_dir = './ckpt'
+ckpt_config = CheckpointConfig(save_checkpoint_steps=num_batches)
+ckpt_cb = ModelCheckpoint(prefix='densenet121-cifar10',
+                          directory=ckpt_save_dir,
+                          config=ckpt_config)
+
+model.train(5, loader_train, callbacks=[LossMonitor(num_batches//5), TimeMonitor(num_batches//5), ckpt_cb], dataset_sink_mode=False)
+
+
[WARNING] DEVICE(1769,ffff87c70ac0,python):2022-12-21-16:04:30.001.890 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:330] FilterRaisedOrReducePrecisionMatchedKernelInfo] Operator:[Default/network-WithLossCell/_loss_fn-CrossEntropySmooth/GatherD-op5273] don't support int64, reduce precision from int64 to int32.
+
+
+epoch: 1 step: 156, loss is 2.0816354751586914
+epoch: 1 step: 312, loss is 1.4474115371704102
+epoch: 1 step: 468, loss is 0.8935483694076538
+epoch: 1 step: 624, loss is 0.5588696002960205
+epoch: 1 step: 780, loss is 0.3161369860172272
+
+
+[WARNING] DEVICE(1769,ffff87c70ac0,python):2022-12-21-16:09:20.261.851 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:330] FilterRaisedOrReducePrecisionMatchedKernelInfo] Operator:[Default/network-WithLossCell/_loss_fn-CrossEntropySmooth/GatherD-op16720] don't support int64, reduce precision from int64 to int32.
+
+
+Train epoch time: 416429.509 ms, per step time: 532.519 ms
+epoch: 2 step: 154, loss is 0.19752007722854614
+epoch: 2 step: 310, loss is 0.14635677635669708
+epoch: 2 step: 466, loss is 0.3511860966682434
+epoch: 2 step: 622, loss is 0.12542471289634705
+epoch: 2 step: 778, loss is 0.22351759672164917
+Train epoch time: 156746.872 ms, per step time: 200.444 ms
+epoch: 3 step: 152, loss is 0.08965137600898743
+epoch: 3 step: 308, loss is 0.22765043377876282
+epoch: 3 step: 464, loss is 0.19035443663597107
+epoch: 3 step: 620, loss is 0.06591956317424774
+epoch: 3 step: 776, loss is 0.0934530645608902
+Train epoch time: 156574.210 ms, per step time: 200.223 ms
+epoch: 4 step: 150, loss is 0.03782692924141884
+epoch: 4 step: 306, loss is 0.023876197636127472
+epoch: 4 step: 462, loss is 0.038690414279699326
+epoch: 4 step: 618, loss is 0.15388774871826172
+epoch: 4 step: 774, loss is 0.1581358164548874
+Train epoch time: 158398.108 ms, per step time: 202.555 ms
+epoch: 5 step: 148, loss is 0.06556802988052368
+epoch: 5 step: 304, loss is 0.006707251071929932
+epoch: 5 step: 460, loss is 0.02353120595216751
+epoch: 5 step: 616, loss is 0.014183484017848969
+epoch: 5 step: 772, loss is 0.09367241710424423
+Train epoch time: 154978.618 ms, per step time: 198.182 ms
+
+

Evaluation

+

Now, let's evaluate the trained model on the validation set of CIFAR-10.

+
# Load validation dataset
+dataset_val = create_dataset(
+    name='cifar10', root=cifar10_dir, split='test', shuffle=True, num_parallel_workers=num_workers
+)
+
+# Perform data enhancement operations to generate the required dataset.
+loader_val = create_loader(dataset=dataset_val,
+                           batch_size=64,
+                           is_training=False,
+                           num_classes=num_classes,
+                           transform=trans,
+                           num_parallel_workers=num_workers)
+
+

Load the fine-tuning parameter file (densenet121-cifar10-5_782.ckpt) to the model.

+

Encapsulate inferable instances according to the parameters passed in by the user, load the validation dataset and verify the precision of the fine-tuned DenseNet121 model.

+
# Verify the accuracy of DenseNet121 after fine-tune
+acc = model.eval(loader_val, dataset_sink_mode=False)
+print(acc)
+
+
[WARNING] DEVICE(1769,ffff87c70ac0,python):2022-12-21-16:24:11.927.472 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:330] FilterRaisedOrReducePrecisionMatchedKernelInfo] Operator:[Default/network-WithLossCell/_loss_fn-CrossEntropySmooth/GatherD-op24314] don't support int64, reduce precision from int64 to int32.
+
+
+{'accuracy': 0.951}
+
+
+[WARNING] DEVICE(1769,ffff87c70ac0,python):2022-12-21-16:25:01.871.273 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:330] FilterRaisedOrReducePrecisionMatchedKernelInfo] Operator:[Default/network-WithLossCell/_loss_fn-CrossEntropySmooth/GatherD-op27139] don't support int64, reduce precision from int64 to int32.
+
+

Use YAML files for model training and validation

+

We can also use the yaml file with the model parameters set directly to quickly train and verify the model through train.py and validate.py scripts. +The following is an example of training SqueezenetV1 on ImageNet (you need to download ImageNet to the directory in advance).

+
+

For detailed tutorials, please refer to the tutorial.

+
+
# standalone training on a CPU/GPU/Ascend device
+python train.py -c configs/squeezenet/squeezenet_1.0_gpu.yaml --data_dir /path/to/dataset --distribute False
+
+
python validate.py -c configs/squeezenet/squeezenet_1.0_gpu.yaml --data_dir /path/to/dataset --ckpt_path /path/to/ckpt
+
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/gen_ref_pages.py b/gen_ref_pages.py new file mode 100644 index 000000000..9eeb4f9d3 --- /dev/null +++ b/gen_ref_pages.py @@ -0,0 +1,59 @@ +"""Generate the code reference pages of models.""" +import os +import sys + +sys.path.append(".") + +import importlib +import logging +from pathlib import Path + +_logger = logging.getLogger('mkdocs') +_langs = ["en", "zh"] + + +def _gen_page(lang): + full_doc_path = Path(f"docs/{lang}/reference/models.md") + _logger.info(f"Generating reference page: {full_doc_path}") + with open(full_doc_path, "w") as fd: + print("# Models", file=fd) + print("\n\n## Create Model", file=fd) + print("\n### ::: mindcv.models.model_factory.create_model", file=fd) + + for path in sorted(Path("mindcv/models").rglob("*.py")): + module_path = path.with_suffix("") # eg: mindcv/models/resnet + parts = list(module_path.parts) # eg: ["mindcv", "models", "resnet"] + if parts[-1].startswith("__") or parts[-2] == "layers": + continue + # fileter out utility modules + if parts[-1] in ["model_factory", "registry", "utils", "helpers"]: + continue + # filter out the net module which is replaced by the net function with the same name + # TODO: we need to change mechanism of model importing + if parts[-1] in ["googlenet", "inception_v3", "inception_v4", "xception", "pnasnet"]: + continue + + try: + print(f"\n\n## {parts[-1]}", file=fd) + identifier = ".".join(parts) # eg: mindcv.models.resnet + mod = importlib.import_module(identifier) + for mem in sorted(set(mod.__all__)): + print(f"\n### ::: {identifier}.{mem}", file=fd) + except Exception as err: + _logger.warning(f"Cannot generate reference of {identifier}, error: {err}.") + + +def _del_page(lang): + full_doc_path = Path(f"docs/{lang}/reference/models.md") + _logger.info(f"Cleaning generated reference page: {full_doc_path}") + os.remove(full_doc_path) + + +def on_startup(command, dirty): + for lang in _langs: + _gen_page(lang) + + +def on_shutdown(): + for lang in _langs: + _del_page(lang) diff --git a/how_to_guides/write_a_new_model/index.html b/how_to_guides/write_a_new_model/index.html new file mode 100644 index 000000000..19be1664c --- /dev/null +++ b/how_to_guides/write_a_new_model/index.html @@ -0,0 +1,1396 @@ + + + + + + + + + + + + + + + + + + + + + + + + Write A New Model - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Write A New Model

+

This document provides a reference template for writing the model definition file model.py in the MindSpore, aiming to provide a unified code style.

+

Next, let's take MLP-Mixer as an example.

+

File Header

+

A brief description of the document. Include the model name and paper title. As follows:

+
"""
+MindSpore implementation of `${MODEL_NAME}`.
+Refer to ${PAPER_NAME}.
+"""
+
+

Module Import

+

There are three types of module imports. Respectively

+
    +
  • Python native or third-party libraries. For example, import math and import numpy as np. It should be placed in the first echelon.
  • +
  • MindSpore related modules. For example, import mindspore.nn as nn and import mindspore.ops as ops. It should be placed in the second echelon.
  • +
  • The module in the MindCV package. For example, from .layers.classifier import ClassifierHead. It should be placed in the third echelon and use relative import.
  • +
+

Examples are as follows:

+
import math
+from collections import OrderedDict
+
+import mindspore.nn as nn
+import mindspore.ops as ops
+import mindspore.common.initializer as init
+
+from .utils import load_pretrained
+from .layers.classifier import ClassifierHead
+
+

Only import necessary modules or packages to avoid importing useless packages.

+

__all__

+
+

Python has no native visibility control, its visibility is maintained by a set of "conventions" that everyone should consciously abide by __all__ is a convention for exposing interfaces to modules and provides a "white list" to expose the interface. If __all__ is defined, other files use from xxx import * to import this file, only the members listed in __all__ will be imported, and other members can be excluded.

+
+

We agree that the exposed interfaces in the model include the main model class and functions that return models of different specifications, such as:

+
__all__ = [
+    "MLPMixer",
+    "mlp_mixer_s_p32",
+    "mlp_mixer_s_p16",
+    ...
+]
+
+

Where MLPMixer is the main model class, and mlp_mixer_s_p32 and mlp_mixer_s_p16 are functions that return models of different specifications. Generally speaking, a submodel, that is, a Layer or a Block, should not be shared by other files. If this is the case, you should consider extracting the submodel under ${MINDCLS}/models/layers as a common module, such as SEBlock.

+

Submodel

+

We all know that a depth model is a network composed of multiple layers. Some of these layers can form sub-models of the same topology, which we generally call Layer or Block, such as ResidualBlock. This kind of abstraction is conducive to our understanding of the whole model structure and is also conducive to code writing.

+

We should briefly describe the function of the sub-model through class annotations. In MindSpore, the model class inherits from nn.Cell. Generally speaking, we need to overload the following two functions:

+
    +
  • In the __init__ function, we should define the neural network layer that needs to be used in the model (the parameters in __init__ should be declared with parameter types, that is, type hint).
  • +
  • In the construct function, we define the model forward logic.
  • +
+

Examples are as follows:

+
class MixerBlock(nn.Cell):
+    """Mixer Layer with token-mixing MLP and channel-mixing MLP"""
+
+    def __init__(self,
+                 n_patches: int,
+                 n_channels: int,
+                 token_dim: int,
+                 channel_dim: int,
+                 dropout: float = 0.
+                 ) -> None:
+        super().__init__()
+        self.token_mix = nn.SequentialCell(
+            nn.LayerNorm((n_channels,)),
+            TransPose((0, 2, 1)),
+            FeedForward(n_patches, token_dim, dropout),
+            TransPose((0, 2, 1))
+        )
+        self.channel_mix = nn.SequentialCell(
+            nn.LayerNorm((n_channels,)),
+            FeedForward(n_channels, channel_dim, dropout),
+        )
+
+    def construct(self, x):
+        x = x + self.token_mix(x)
+        x = x + self.channel_mix(x)
+        return x
+
+

In the process of compiling the nn.Cell class, there are two noteworthy aspects

+
    +
  • +

    CellList & SequentialCell

    +
  • +
  • +

    CellList is just a container that contains a list of neural network layers(Cell). The Cells contained by it can be properly registered and will be visible by all Cell methods. We must overwrite the forward calculation, that is, the construct function.

    +
  • +
  • +

    SequentialCell is a container that holds a sequential list of layers(Cell). The Cells may have a name(OrderedDict) or not(List). We don't need to implement forward computation, which is done according to the order of the sequential list.

    +
  • +
  • +

    construct

    +
  • +
  • +

    Assert is not supported. [RuntimeError: ParseStatement] Unsupported statement 'Assert'.

    +
  • +
  • +

    Usage of single operator. When calling an operator (such as concat, reshape, mean), use the functional interface mindspore.ops.functional (such as output=ops.concat((x1, x2)) to avoid instantiating the original operator ops.Primary (such as self.Concat()) in init before calling it in construct (output=self.concat((x1, x2)).

    +
  • +
+

Master Model

+

The main model is the network model definition proposed in the paper, which is composed of multiple sub-models. It is the top-level network suitable for classification, detection, and other tasks. It is basically similar to the submodel in code writing, but there are several differences.

+
    +
  • Class annotations. We should give the title and link of the paper here. In addition, since this class is exposed to the outside world, we'd better also add a description of the class initialization parameters. See code below.
  • +
  • forward_features function. The operational definition of the characteristic network of the model in the function.
  • +
  • forward_head function. The operation of the classifier of the model is defined in the function.
  • +
  • construct function. In function call feature network and classifier operation.
  • +
  • _initialize_weights function. We agree that the random initialization of model parameters is completed by this member function. See code below.
  • +
+

Examples are as follows:

+
class MLPMixer(nn.Cell):
+    r"""MLP-Mixer model class, based on
+    `"MLP-Mixer: An all-MLP Architecture for Vision" <https://arxiv.org/abs/2105.01601>`_
+
+    Args:
+        depth (int) : number of MixerBlocks.
+        patch_size (Union[int, tuple]) : size of a single image patch.
+        n_patches (int) : number of patches.
+        n_channels (int) : channels(dimension) of a single embedded patch.
+        token_dim (int) : hidden dim of token-mixing MLP.
+        channel_dim (int) : hidden dim of channel-mixing MLP.
+        in_channels(int): number the channels of the input. Default: 3.
+        n_classes (int) : number of classification classes. Default: 1000.
+    """
+
+    def __init__(self,
+                 depth: int,
+                 patch_size: Union[int, tuple],
+                 n_patches: int,
+                 n_channels: int,
+                 token_dim: int,
+                 channel_dim: int,
+                 in_channels: int = 3,
+                 n_classes: int = 1000,
+                 ) -> None:
+        super().__init__()
+        self.n_patches = n_patches
+        self.n_channels = n_channels
+        # patch with shape of (3, patch_size, patch_size) is embedded to n_channels dim feature.
+        self.to_patch_embedding = nn.SequentialCell(
+            nn.Conv2d(in_chans, n_channels, patch_size, patch_size, pad_mode="pad", padding=0),
+            TransPose(permutation=(0, 2, 1), embedding=True),
+        )
+        self.mixer_blocks = nn.SequentialCell()
+        for _ in range(depth):
+            self.mixer_blocks.append(MixerBlock(n_patches, n_channels, token_dim, channel_dim))
+        self.layer_norm = nn.LayerNorm((n_channels,))
+        self.mlp_head = nn.Dense(n_channels, n_classes)
+        self._initialize_weights()
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.to_patch_embedding(x)
+        x = self.mixer_blocks(x)
+        x = self.layer_norm(x)
+        return ops.mean(x, 1)
+
+    def forward_head(self, x: Tensor)-> Tensor:
+        return self.mlp_head(x)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        return self.forward_head(x)
+
+    def _initialize_weights(self) -> None:
+        for name, m in self.cells_and_names():
+            if isinstance(m, nn.Conv2d):
+                m.weight.set_data(init.initializer(init.Normal(0.01, 0), m.weight.shape))
+                if m.bias is not None:
+                    m.bias.set_data(init.initializer(init.Constant(0), m.bias.shape))
+            elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
+                m.gamma.set_data(init.initializer(init.Constant(1), m.gamma.shape))
+                if m.beta is not None:
+                    m.beta.set_data(init.initializer(init.Constant(0.0001), m.beta.shape))
+            elif isinstance(m, nn.Dense):
+                m.weight.set_data(init.initializer(init.Normal(0.01, 0), m.weight.shape))
+                if m.bias is not None:
+                    m.bias.set_data(init.initializer(init.Constant(0), m.bias.shape))
+
+

Specification Function

+

The model proposed in the paper may have different specifications, such as the size of the channel, the size of the depth, and so on. The specific configuration of these variants should be reflected through the specification function. The specification interface parameters: pretrained, num_classes, in_channels should be named uniformly. At the same time, the pretrain loading operation should be performed in the specification function. Each specification function corresponds to a specification variant that determines the configuration. The configuration transfers the definition of the main model class through the input parameter and returns the instantiated main model class. In addition, you need to register this specification of the model in the package by adding the decorator @register_model.

+

Examples are as follows:

+
@register_model
+def mlp_mixer_s_p16(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 8, 16, 196, 512, 256, 2048
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds,
+                    channel_dim=dc, in_chans=in_chans, n_classes=num_classes, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+@register_model
+def mlp_mixer_b_p32(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 12, 32, 49, 768, 384, 3072
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds,
+                    channel_dim=dc, in_chans=in_chans, n_classes=num_classes, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+

Verify Main (Optional)

+

The initial writing phase should ensure that the model is operational. The following code blocks can be used for basic verification:

+
if __name__ == '__main__':
+    import numpy as np
+    import mindspore
+    from mindspore import Tensor
+
+    model = mlp_mixer_s_p16()
+    print(model)
+    dummy_input = Tensor(np.random.rand(8, 3, 224, 224), dtype=mindspore.float32)
+    y = model(dummy_input)
+    print(y.shape)
+
+

Reference Example

+
    +
  • densenet.py
  • +
  • shufflenetv1.py
  • +
  • shufflenetv2.py
  • +
  • mixnet.py
  • +
  • mlp_mixer.py
  • +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/index.html b/index.html new file mode 100644 index 000000000..54b419bea --- /dev/null +++ b/index.html @@ -0,0 +1,1499 @@ + + + + + + + + + + + + + + + + + + + + + + Home - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +
+

MindCV

+

CI +PyPI - Python Version +PyPI +docs +license +open issues +PRs +Code style: black +Imports: isort +pre-commit

+
+

Introduction

+

MindCV is an open-source toolbox for computer vision research and development based on MindSpore. It collects a series of classic and SoTA vision models, such as ResNet and SwinTransformer, along with their pre-trained weights and training strategies. SoTA methods such as auto augmentation are also provided for performance improvement. With the decoupled module design, it is easy to apply or adapt MindCV to your own CV tasks.

+

Major Features

+
    +
  • +

    Easy-to-Use. MindCV decomposes the vision framework into various configurable components. It is easy to customize your data pipeline, models, and learning pipeline with MindCV:

    +
    >>> import mindcv
    +# create a dataset
    +>>> dataset = mindcv.create_dataset('cifar10', download=True)
    +# create a model
    +>>> network = mindcv.create_model('resnet50', pretrained=True)
    +
    +

    Users can customize and launch their transfer learning or training task in one command line.

    +
    # transfer learning in one command line
    +python train.py --model=swin_tiny --pretrained --opt=adamw --lr=0.001 --data_dir=/path/to/data
    +
    +
  • +
  • +

    State-of-The-Art. MindCV provides various CNN-based and Transformer-based vision models including SwinTransformer. Their pretrained weights and performance reports are provided to help users select and reuse the right model:

    +
  • +
  • +

    Flexibility and efficiency. MindCV is built on MindSpore which is an efficient DL framework that can be run on different hardware platforms (GPU/CPU/Ascend). It supports both graph mode for high efficiency and pynative mode for flexibility.

    +
  • +
+

Model Zoo

+

The performance of the models trained with MindCV is summarized in here, where the training recipes and weights are both available.

+

Model introduction and training details can be viewed in each sub-folder under configs.

+

Installation

+

See Installation for details.

+

Getting Started

+

Hands-on Tutorial

+

To get started with MindCV, please see the Quick Start, which will give you a quick tour of each key component and the train/validate/predict pipelines.

+

Below are a few code snippets for your taste.

+
>>> import mindcv
+# List and find a pretrained vision model
+>>> mindcv.list_models("swin*", pretrained=True)
+['swin_tiny']
+# Create the model object
+>>> network = mindcv.create_model('swin_tiny', pretrained=True)
+# Validate its accuracy
+>>> !python validate.py --model=swin_tiny --pretrained --dataset=imagenet --val_split=validation
+{'Top_1_Accuracy': 0.80824, 'Top_5_Accuracy': 0.94802, 'loss': 1.7331367141008378}
+
+
+Image Classification Demo +

Right click on the image below and save as dog.jpg.

+

+ +

+

Classify the downloaded image with a pretrained SoTA model:

+
>>> !python infer.py --model=swin_tiny --image_path='./dog.jpg'
+{'Labrador retriever': 0.5700152, 'golden retriever': 0.034551315, 'kelpie': 0.010108651, 'Chesapeake Bay retriever': 0.008229004, 'Walker hound, Walker foxhound': 0.007791956}
+
+

The top-1 prediction result is labrador retriever, which is the breed of this cut dog.

+
+

Training

+

It is easy to train your model on a standard or customized dataset using train.py, where the training strategy (e.g., augmentation, LR scheduling) can be configured with external arguments or a yaml config file.

+
    +
  • +

    Standalone Training

    +
    # standalone training
    +python train.py --model=resnet50 --dataset=cifar10 --dataset_download
    +
    +

    Above is an example of training ResNet50 on CIFAR10 dataset on a CPU/GPU/Ascend device

    +
  • +
  • +

    Distributed Training

    +

    For large datasets like ImageNet, it is necessary to do training in distributed mode on multiple devices. This can be achieved with mpirun and parallel features supported by MindSpore.

    +
    # distributed training
    +# assume you have 4 GPUs/NPUs
    +mpirun -n 4 python train.py --distribute \
    +    --model=densenet121 --dataset=imagenet --data_dir=/path/to/imagenet
    +
    +
    +

    Notes: If the script is executed by the root user, the --allow-run-as-root parameter must be added to mpirun.

    +
    +

    Detailed parameter definitions can be seen in config.py and checked by running `python train.py --help'.

    +

    To resume training, please set the --ckpt_path and --ckpt_save_dir arguments. The optimizer state including the learning rate of the last stopped epoch will also be recovered.

    +
  • +
  • +

    Config and Training Strategy

    +

    You can configure your model and other components either by specifying external parameters or by writing a yaml config file. Here is an example of training using a preset yaml file.

    +
    mpirun --allow-run-as-root -n 4 python train.py -c configs/squeezenet/squeezenet_1.0_gpu.yaml
    +
    +
    +

    Pre-defined Training Strategies

    +

    We provide more than 20 training recipes that achieve SoTA results on ImageNet currently. +Please look into the configs folder for details. +Please feel free to adapt these training strategies to your own model for performance improvement, which can be easily done by modifying the yaml file.

    +
    +
  • +
  • +

    Train on ModelArts/OpenI Platform

    +

    To run training on the ModelArts or OpenI cloud platform:

    +
    1. Create a new training task on the cloud platform.
    +2. Add the parameter `config` and specify the path to the yaml config file on the website UI interface.
    +3. Add the parameter `enable_modelarts` and set True on the website UI interface.
    +4. Fill in other blanks on the website and launch the training task.
    +
    +
  • +
+
+

Graph Mode and PyNative Mode

+

By default, the training pipeline train.py is run in graph mode on MindSpore, which is optimized for efficiency and parallel computing with a compiled static graph. +In contrast, pynative mode is optimized for flexibility and easy debugging. You may alter the parameter --mode to switch to pure pynative mode for debugging purpose.

+
+
+

Mixed Mode

+

PyNative mode with mindspore.jit is a mixed mode for comprising flexibility and efficiency in MindSpore. To apply pynative mode with mindspore.jit for training, please run train_with_func.py, e.g.,

+
python train_with_func.py --model=resnet50 --dataset=cifar10 --dataset_download  --epoch_size=10
+
+
+

Note: this is an experimental function under improvement. It is not stable on MindSpore 1.8.1 or earlier versions.

+
+
+

Validation

+

To evaluate the model performance, please run validate.py

+
# validate a trained checkpoint
+python validate.py --model=resnet50 --dataset=imagenet --data_dir=/path/to/data --ckpt_path=/path/to/model.ckpt
+
+
+

Validation while Training

+

You can also track the validation accuracy during training by enabling the --val_while_train option.

+
python train.py --model=resnet50 --dataset=cifar10 \
+    --val_while_train --val_split=test --val_interval=1
+
+

The training loss and validation accuracy for each epoch will be saved in ${ckpt_save_dir}/results.log.

+

More examples about training and validation can be seen in examples.

+
+

Tutorials

+

We provide the following jupyter notebook tutorials to help users learn to use MindCV.

+ +

Supported Algorithms

+
+ Supported algorithms +
    +
  • Augmentation +
  • +
  • Optimizer
      +
    • Adam
    • +
    • AdamW
    • +
    • Lion
    • +
    • Adan (experimental)
    • +
    • AdaGrad
    • +
    • LAMB
    • +
    • Momentum
    • +
    • RMSProp
    • +
    • SGD
    • +
    • NAdam
    • +
    +
  • +
  • LR Scheduler
      +
    • Warmup Cosine Decay
    • +
    • Step LR
    • +
    • Polynomial Decay
    • +
    • Exponential Decay
    • +
    +
  • +
  • Regularization
      +
    • Weight Decay
    • +
    • Label Smoothing
    • +
    • Stochastic Depth (depends on networks)
    • +
    • Dropout (depends on networks)
    • +
    +
  • +
  • Loss
      +
    • Cross Entropy (w/ class weight and auxiliary logit support)
    • +
    • Binary Cross Entropy (w/ class weight and auxiliary logit support)
    • +
    • Soft Cross Entropy Loss (automatically enabled if mixup or label smoothing is used)
    • +
    • Soft Binary Cross Entropy Loss (automatically enabled if mixup or label smoothing is used)
    • +
    +
  • +
  • Ensemble
      +
    • Warmup EMA (Exponential Moving Average)
    • +
    +
  • +
+
+

How to Contribute

+

We appreciate all kinds of contributions including issues and PRs to make MindCV better.

+

Please refer to CONTRIBUTING for the contributing guideline. +Please follow the Model Template and Guideline for contributing a model that fits the overall interface :)

+

License

+

This project follows the Apache License 2.0 open-source license.

+

Acknowledgement

+

MindCV is an open-source project jointly developed by the MindSpore team, Xidian University, and Xi'an Jiaotong University. +Sincere thanks to all participating researchers and developers for their hard work on this project. +We also acknowledge the computing resources provided by OpenI.

+

Citation

+

If you find this project useful in your research, please consider citing:

+
@misc{MindSpore Computer Vision 2022,
+    title={{MindSpore Computer  Vision}:MindSpore Computer Vision Toolbox and Benchmark},
+    author={MindSpore Vision Contributors},
+    howpublished = {\url{https://github.com/mindspore-lab/mindcv/}},
+    year={2022}
+}
+
+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/installation/index.html b/installation/index.html new file mode 100644 index 000000000..00fb9644e --- /dev/null +++ b/installation/index.html @@ -0,0 +1,1211 @@ + + + + + + + + + + + + + + + + + + + + + + + + Installation - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Installation

+ +

Dependency

+
    +
  • mindspore >= 1.8.1
  • +
  • numpy >= 1.17.0
  • +
  • pyyaml >= 5.3
  • +
  • tqdm
  • +
  • openmpi 4.0.3 (for distributed mode)
  • +
+

To install the python library dependency, just run:

+
pip install -r requirements.txt
+
+
+

Tip

+

MindSpore can be easily installed by following the official instructions where you can select your hardware platform for the best fit. +To run in distributed mode, OpenMPI is required to install.

+
+

The following instructions assume the desired dependency is fulfilled.

+

Install with PyPI

+

MindCV is published as a Python package and can be installed with +pip, ideally by using a virtual environment. Open up a terminal and install +MindCV with:

+
+
+
+
pip install mindcv
+
+
+
+
# working on it using test.pypi
+
+
+
+
+

This will automatically install compatible versions of dependencies: +NumPy, PyYAML and tqdm.

+
+

Tip

+

If you don't have prior experience with Python, we recommend reading +Using Python's pip to Manage Your Projects' Dependencies, which is a really +good introduction to the mechanics of Python package management and helps you +troubleshoot if you run into errors.

+
+
+

Warning

+

The above command will NOT install MindSpore. +We highly recommend you install MindSpore following the official instructions.

+
+

Install from Source (Bleeding Edge Version)

+

from VCS

+
pip install git+https://github.com/mindspore-lab/mindcv.git
+
+

from local src

+
+

Tip

+

As this project is in active development, if you are a developer or contributor, please prefer this installation!

+
+

MindCV can be directly used from GitHub by cloning the repository into a local folder which might be useful if you want to use the very latest version:

+
git clone https://github.com/mindspore-lab/mindcv.git
+
+

After cloning from git, it is recommended that you install using "editable" mode, which can help resolve potential module import issues:

+
cd mindcv
+pip install -e .
+
+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/modelzoo/index.html b/modelzoo/index.html new file mode 100644 index 000000000..88508b656 --- /dev/null +++ b/modelzoo/index.html @@ -0,0 +1,2484 @@ + + + + + + + + + + + + + + + + + + + + + + + + Model Zoo - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Model Zoo

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModelContextTop-1 (%)Top-5 (%)Params(M)RecipeDownload
BiT_resnet50D910x8-G76.8193.1725.55yamlweights
BiT_resnet50x3D910x8-G80.6395.12217.31yamlweights
BiT_resnet101D910x8-G77.9393.7544.54yamlweights
coat_lite_tinyD910x8-G77.3593.435.72yamlweights
coat_lite_miniD910x8-G78.5193.8411.01yamlweights
coat_tinyD910x8-G79.6794.885.50yamlweights
coat_miniD910x8-G81.0895.3410.34yamlweights
convit_tinyD910x8-G73.6691.725.71yamlweights
convit_tiny_plusD910x8-G77.0093.609.97yamlweights
convit_smallD910x8-G81.6395.5927.78yamlweights
convit_small_plusD910x8-G81.8095.4248.98yamlweights
convit_baseD910x8-G82.1095.5286.54yamlweights
convit_base_plusD910x8-G81.9695.04153.13yamlweights
convnext_tinyD910x64-G81.9195.7928.59yamlweights
convnext_smallD910x64-G83.4096.3650.22yamlweights
convnext_baseD910x64-G83.3296.2488.59yamlweights
convnextv2_tinyD910x8-G82.4395.9828.64yamlweights
crossvit_9D910x8-G73.5691.798.55yamlweights
crossvit_15D910x8-G81.0895.3327.27yamlweights
crossvit_18D910x8-G81.9395.7543.27yamlweights
densenet121D910x8-G75.6492.848.06yamlweights
densenet161D910x8-G79.0994.6628.90yamlweights
densenet169D910x8-G77.2693.7114.31yamlweights
densenet201D910x8-G78.1494.0820.24yamlweights
dpn92D910x8-G79.4694.4937.79yamlweights
dpn98D910x8-G79.9494.5761.74yamlweights
dpn107D910x8-G80.0594.7487.13yamlweights
dpn131D910x8-G80.0794.7279.48yamlweights
edgenext_xx_smallD910x8-G71.0289.991.33yamlweights
edgenext_x_smallD910x8-G75.1492.502.34yamlweights
edgenext_smallD910x8-G79.1594.395.59yamlweights
edgenext_baseD910x8-G82.2495.9418.51yamlweights
efficientnet_b0D910x64-G76.8993.165.33yamlweights
efficientnet_b1D910x64-G78.9594.347.86yamlweights
ghostnet_050D910x8-G66.0386.642.60yamlweights
ghostnet_100D910x8-G73.7891.665.20yamlweights
ghostnet_130D910x8-G75.5092.567.39yamlweights
googlenetD910x8-G72.6890.896.99yamlweights
hrnet_w32D910x8-G80.6495.4441.30yamlweights
hrnet_w48D910x8-G81.1995.6977.57yamlweights
inception_v3D910x8-G79.1194.4027.20yamlweights
inception_v4D910x8-G80.8895.3442.74yamlweights
mixnet_sD910x8-G75.5292.524.17yamlweights
mixnet_mD910x8-G76.6493.055.06yamlweights
mixnet_lD910x8-G78.7394.317.38yamlweights
mnasnet_050D910x8-G68.0788.092.14yamlweights
mnasnet_075D910x8-G71.8190.533.20yamlweights
mnasnet_100D910x8-G74.2891.704.42yamlweights
mnasnet_130D910x8-G75.6592.646.33yamlweights
mnasnet_140D910x8-G76.0192.837.16yamlweights
mobilenet_v1_025D910x8-G53.8777.660.47yamlweights
mobilenet_v1_050D910x8-G65.9486.511.34yamlweights
mobilenet_v1_075D910x8-G70.4489.492.60yamlweights
mobilenet_v1_100D910x8-G72.9591.014.25yamlweights
mobilenet_v2_075D910x8-G69.9889.322.66yamlweights
mobilenet_v2_100D910x8-G72.2790.723.54yamlweights
mobilenet_v2_140D910x8-G75.5692.566.15yamlweights
mobilenet_v3_small_100D910x8-G68.1087.862.55yamlweights
mobilenet_v3_large_100D910x8-G75.2392.315.51yamlweights
mobilevit_xx_smallD910x8-G68.9188.911.27yamlweights
mobilevit_x_smallD910x8-G74.9992.322.32yamlweights
mobilevit_smallD910x8-G78.4794.185.59yamlweights
nasnet_a_4x1056D910x8-G73.6591.255.33yamlweights
pit_tiD910x8-G72.9691.334.85yamlweights
pit_xsD910x8-G78.4194.0610.61yamlweights
pit_sD910x8-G80.5694.8023.46yamlweights
pit_bD910x8-G81.8795.0473.76yamlweights
poolformer_s12D910x8-G77.3393.3411.92yamlweights
pvt_tinyD910x8-G74.8192.1813.23yamlweights
pvt_smallD910x8-G79.6694.7124.49yamlweights
pvt_mediumD910x8-G81.8295.8144.21yamlweights
pvt_largeD910x8-G81.7595.7061.36yamlweights
pvt_v2_b0D910x8-G71.5090.603.67yamlweights
pvt_v2_b1D910x8-G78.9194.4914.01yamlweights
pvt_v2_b2D910x8-G81.9995.7425.35yamlweights
pvt_v2_b3D910x8-G82.8496.2445.24yamlweights
pvt_v2_b4D910x8-G83.1496.2762.56yamlweights
regnet_x_200mfD910x8-G68.7488.382.68yamlweights
regnet_x_400mfD910x8-G73.1691.355.16yamlweights
regnet_x_600mfD910x8-G74.3492.006.20yamlweights
regnet_x_800mfD910x8-G76.0492.977.26yamlweights
regnet_y_200mfD910x8-G70.3089.613.16yamlweights
regnet_y_400mfD910x8-G73.9191.844.34yamlweights
regnet_y_600mfD910x8-G75.6992.506.06yamlweights
regnet_y_800mfD910x8-G76.5293.106.26yamlweights
regnet_y_16gfD910x8-G82.9296.2983.71yamlweights
repmlp_t224D910x8-G76.7193.3038.30yamlweights
repvgg_a0D910x8-G72.1990.759.13yamlweights
repvgg_a1D910x8-G74.1991.8914.12yamlweights
repvgg_a2D910x8-G76.6393.4228.25yamlweights
repvgg_b0D910x8-G74.9992.4015.85yamlweights
repvgg_b1D910x8-G78.8194.3757.48yamlweights
repvgg_b2D910x64-G79.2994.6689.11yamlweights
repvgg_b3D910x64-G80.4695.34123.19yamlweights
repvgg_b1g2D910x8-G78.0394.0945.85yamlweights
repvgg_b1g4D910x8-G77.6494.0340.03yamlweights
repvgg_b2g4D910x8-G78.894.3661.84yamlweights
res2net50D910x8-G79.3594.6425.76yamlweights
res2net101D910x8-G79.5694.7045.33yamlweights
res2net50_v1bD910x8-G80.3295.0925.77yamlweights
res2net101_v1bD910x8-G81.1495.4145.35yamlweights
resnest50D910x8-G80.8195.1627.55yamlweights
resnest101D910x8-G82.9096.1248.41yamlweights
resnet18D910x8-G70.2189.6211.70yamlweights
resnet34D910x8-G74.1591.9821.81yamlweights
resnet50D910x8-G76.6993.5025.61yamlweights
resnet101D910x8-G78.2494.0944.65yamlweights
resnet152D910x8-G78.7294.4560.34yamlweights
resnetv2_50D910x8-G76.9093.3725.60yamlweights
resnetv2_101D910x8-G78.4894.2344.55yamlweights
resnext50_32x4dD910x8-G78.5394.1025.10yamlweights
resnext101_32x4dD910x8-G79.8394.8044.32yamlweights
resnext101_64x4dD910x8-G80.3094.8283.66yamlweights
resnext152_64x4dD910x8-G80.5295.00115.27yamlweights
rexnet_09D910x8-G77.0693.414.13yamlweights
rexnet_10D910x8-G77.3893.604.84yamlweights
rexnet_13D910x8-G79.0694.287.61yamlweights
rexnet_15D910x8-G79.9594.749.79yamlweights
rexnet_20D910x8-G80.6494.9916.45yamlweights
seresnet18D910x8-G71.8190.4911.80yamlweights
seresnet34D910x8-G75.3892.5021.98yamlweights
seresnet50D910x8-G78.3294.0728.14yamlweights
seresnext26_32x4dD910x8-G77.1793.4216.83yamlweights
seresnext50_32x4dD910x8-G78.7194.3627.63yamlweights
shufflenet_v1_g3_05D910x8-G57.0579.730.73yamlweights
shufflenet_v1_g3_10D910x8-G67.7787.731.89yamlweights
shufflenet_v2_x0_5D910x8-G60.5382.111.37yamlweights
shufflenet_v2_x1_0D910x8-G69.4788.882.29yamlweights
shufflenet_v2_x1_5D910x8-G72.7990.933.53yamlweights
shufflenet_v2_x2_0D910x8-G75.0792.087.44yamlweights
skresnet18D910x8-G73.0991.2011.97yamlweights
skresnet34D910x8-G76.7193.1022.31yamlweights
skresnext50_32x4dD910x8-G79.0894.6037.31yamlweights
squeezenet1_0D910x8-G59.0181.011.25yamlweights
squeezenet1_0GPUx8-G58.8381.081.25yamlweights
squeezenet1_1D910x8-G58.4480.841.24yamlweights
squeezenet1_1GPUx8-G59.1881.411.24yamlweights
swin_tinyD910x8-G80.8294.8033.38yamlweights
swinv2_tiny_window8D910x8-G81.4295.4328.78yamlweights
vgg11D910x8-G71.8690.50132.86yamlweights
vgg13D910x8-G72.8791.02133.04yamlweights
vgg16D910x8-G74.6191.87138.35yamlweights
vgg19D910x8-G75.2192.56143.66yamlweights
visformer_tinyD910x8-G78.2894.1510.33yamlweights
visformer_tiny_v2D910x8-G78.8294.419.38yamlweights
visformer_smallD910x8-G81.7695.8840.25yamlweights
visformer_small_v2D910x8-G82.1795.9023.52yamlweights
vit_b_32_224D910x8-G75.8692.0887.46yamlweights
vit_l_16_224D910x8-G76.3492.79303.31yamlweights
vit_l_32_224D910x8-G73.7190.92305.52yamlweights
volo_d1D910x8-G82.5995.9927yamlweights
xceptionD910x8-G79.0194.2522.91yamlweights
xcit_tiny_12_p16_224D910x8-G77.6793.797.00yamlweights
+

Notes

+
    +
  • Context: Training context denoted as {device}x{pieces}-{MS mode}, where mindspore mode can be G - graph mode or F - pynative mode with ms function. For example, D910x8-G is for training on 8 pieces of Ascend 910 NPU using graph mode.
  • +
  • Top-1 and Top-5: Accuracy reported on the validation set of ImageNet-1K.
  • +
+ + + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/notes/changelog/index.html b/notes/changelog/index.html new file mode 100644 index 000000000..4778f7789 --- /dev/null +++ b/notes/changelog/index.html @@ -0,0 +1,1034 @@ + + + + + + + + + + + + + + + + + + + + + + + + Change Log - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Change Log

+

Coming soon.

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/notes/code_of_conduct/index.html b/notes/code_of_conduct/index.html new file mode 100644 index 000000000..ae4bf0cbe --- /dev/null +++ b/notes/code_of_conduct/index.html @@ -0,0 +1,1034 @@ + + + + + + + + + + + + + + + + + + + + + + + + Code of Conduct - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Code of Conduct

+

Coming soon.

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/notes/contributing/index.html b/notes/contributing/index.html new file mode 100644 index 000000000..23ed96f5f --- /dev/null +++ b/notes/contributing/index.html @@ -0,0 +1,1337 @@ + + + + + + + + + + + + + + + + + + + + + + + + Contributing - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + +

MindCV Contributing Guidelines

+

Contributions are welcome, and they are greatly appreciated! Every little bit +helps, and credit will always be given.

+

Contributor License Agreement

+

It's required to sign CLA before your first code submission to MindCV community.

+

For individual contributor, please refer to ICLA online document for the detailed information.

+

Types of Contributions

+

Report Bugs

+

Report bugs at https://github.com/mindspore-lab/mindcv/issues.

+

If you are reporting a bug, please include:

+
    +
  • Your operating system name and version.
  • +
  • Any details about your local setup that might be helpful in troubleshooting.
  • +
  • Detailed steps to reproduce the bug.
  • +
+

Fix Bugs

+

Look through the GitHub issues for bugs. Anything tagged with "bug" and "help +wanted" is open to whoever wants to implement it.

+

Implement Features

+

Look through the GitHub issues for features. Anything tagged with "enhancement" +and "help wanted" is open to whoever wants to implement it.

+

Write Documentation

+

MindCV could always use more documentation, whether as part of the +official MindCV docs, in docstrings, or even on the web in blog posts, +articles, and such.

+

Submit Feedback

+

The best way to send feedback is to file an issue at https://github.com/mindspore-lab/mindcv/issues.

+

If you are proposing a feature:

+
    +
  • Explain in detail how it would work.
  • +
  • Keep the scope as narrow as possible, to make it easier to implement.
  • +
  • Remember that this is a volunteer-driven project, and that contributions are welcome :)
  • +
+

Getting Started

+

Ready to contribute? Here's how to set up mindcv for local development.

+
    +
  1. Fork the mindcv repo on GitHub.
  2. +
  3. Clone your fork locally:
  4. +
+
git clone git@github.com:your_name_here/mindcv.git
+
+

After that, you should add official repository as the upstream repository:

+
git remote add upstream git@github.com:mindspore-lab/mindcv
+
+
    +
  1. Install your local copy into a conda environment. Assuming you have conda installed, this is how you set up your fork for local development:
  2. +
+
conda create -n mindcv python=3.8
+conda activate mindcv
+cd mindcv
+pip install -e .
+
+
    +
  1. Create a branch for local development:
  2. +
+
git checkout -b name-of-your-bugfix-or-feature
+
+

Now you can make your changes locally.

+
    +
  1. When you're done making changes, check that your changes pass the linters and the tests:
  2. +
+
pre-commit run --show-diff-on-failure --color=always --all-files
+pytest
+
+

If all static linting are passed, you will get output like:

+

pre-commit-succeed

+

otherwise, you need to fix the warnings according to the output:

+

pre-commit-failed

+

To get pre-commit and pytest, just pip install them into your conda environment.

+
    +
  1. Commit your changes and push your branch to GitHub:
  2. +
+
git add .
+git commit -m "Your detailed description of your changes."
+git push origin name-of-your-bugfix-or-feature
+
+
    +
  1. Submit a pull request through the GitHub website.
  2. +
+

Pull Request Guidelines

+

Before you submit a pull request, check that it meets these guidelines:

+
    +
  1. The pull request should include tests.
  2. +
  3. If the pull request adds functionality, the docs should be updated. Put + your new functionality into a function with a docstring, and add the + feature to the list in README.md.
  4. +
  5. The pull request should work for Python 3.7, 3.8 and 3.9, and for PyPy. Check + https://github.com/mindspore-lab/mindcv/actions + and make sure that the tests pass for all supported Python versions.
  6. +
+

Tips

+

You can install the git hook scripts instead of linting with pre-commit run -a manually.

+

run flowing command to set up the git hook scripts

+
pre-commit install
+
+

now pre-commit will run automatically on git commit!

+

Releasing

+

A reminder for the maintainers on how to deploy. +Make sure all your changes are committed (including an entry in HISTORY.md). +Then run:

+
bump2version patch # possible: major / minor / patch
+git push
+git push --tags
+
+

GitHub Action will then deploy to PyPI if tests pass.

+ + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/notes/faq/index.html b/notes/faq/index.html new file mode 100644 index 000000000..afcbf8608 --- /dev/null +++ b/notes/faq/index.html @@ -0,0 +1,1017 @@ + + + + + + + + + + + + + + + + + + + + + + FAQ - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+ +
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/objects.inv b/objects.inv new file mode 100644 index 000000000..b78ffcbc2 Binary files /dev/null and b/objects.inv differ diff --git a/reference/data/index.html b/reference/data/index.html new file mode 100644 index 000000000..65c9ee1ec --- /dev/null +++ b/reference/data/index.html @@ -0,0 +1,4180 @@ + + + + + + + + + + + + + + + + + + + + + + + + data - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Data

+

Auto Augmentation

+ + + +
+ + + +

+mindcv.data.auto_augment.auto_augment_transform(configs, hparams) + +

+ + +
+ +

Create a AutoAugment transform

+ + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
configs +
+

A string that defines the automatic augmentation configuration. +It is composed of multiple parts separated by dashes ("-"). The first part defines +the AutoAugment policy ('autoaug', 'autoaugr' or '3a': +'autoaug' for the original AutoAugment policy with PosterizeOriginal, +'autoaugr' for the AutoAugment policy with PosterizeIncreasing operation, + '3a' for the AutoAugment only with 3 augmentations.) +There is no order requirement for the remaining config parts.

+
    +
  • mstd: Float standard deviation of applied magnitude noise.
  • +
+

Example: 'autoaug-mstd0.5' will be automatically augment using the autoaug strategy +and magnitude_std 0.5.

+
+

+

+
hparams +
+

Other hparams of the automatic augmentation scheme.

+
+

+

+
+ +
+ Source code in mindcv/data/auto_augment.py +
419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
def auto_augment_transform(configs, hparams):
+    """
+    Create a AutoAugment transform
+    Args:
+        configs: A string that defines the automatic augmentation configuration.
+            It is composed of multiple parts separated by dashes ("-"). The first part defines
+            the AutoAugment policy ('autoaug', 'autoaugr' or '3a':
+            'autoaug' for the original AutoAugment policy with PosterizeOriginal,
+            'autoaugr' for the AutoAugment policy with PosterizeIncreasing operation,
+             '3a' for the AutoAugment only with 3 augmentations.)
+            There is no order requirement for the remaining config parts.
+
+            - mstd: Float standard deviation of applied magnitude noise.
+
+            Example: 'autoaug-mstd0.5' will be automatically augment using the autoaug strategy
+            and magnitude_std 0.5.
+        hparams: Other hparams of the automatic augmentation scheme.
+    """
+    config = configs.split("-")
+    policy_name = config[0]
+    config = config[1:]
+    hparams.setdefault("magnitude_std", 0.5)  # default magnitude_std is set to 0.5
+    for c in config:
+        cs = re.split(r"(\d.*)", c)
+        if len(cs) < 2:
+            continue
+        key, val = cs[:2]
+        if key == "mstd":
+            # noise param injected via hparams for now
+            hparams.setdefault("magnitude_std", float(val))
+        else:
+            assert False, "Unknown AutoAugment config section"
+    aa_policy = auto_augment_policy(policy_name, hparams=hparams)
+    return AutoAugment(aa_policy)
+
+
+
+ +
+ + +
+ + + +

+mindcv.data.auto_augment.rand_augment_transform(configs, hparams) + +

+ + +
+ +

Create a RandAugment transform

+ + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
configs +
+

A string that defines the random augmentation configuration. +It is composed of multiple parts separated by dashes ("-"). +The first part defines the AutoAugment policy ('randaug' policy). +There is no order requirement for the remaining config parts.

+
    +
  • m: Integer magnitude of rand augment. Default: 10
  • +
  • n: Integer num layer (number of transform operations selected for each image). Default: 2
  • +
  • w: Integer probability weight index (the index that affects a group of weights selected by operations).
  • +
  • mstd: Floating standard deviation of applied magnitude noise, + or uniform sampling at infinity (or greater than 100).
  • +
  • mmax: Set the upper range limit for magnitude to a value + other than the default value of _LEVEL_DENOM (10).
  • +
  • inc: Integer (bool), using the severity increase with magnitude (default: 0).
  • +
+

Example: 'randaug-w0-n3-mstd0.5' will be random augment + using the weights 0, num_layers 3, magnitude_std 0.5.

+
+

+

+
hparams +
+

Other hparams (kwargs) for the RandAugmentation scheme.

+
+

+

+
+ +
+ Source code in mindcv/data/auto_augment.py +
538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
def rand_augment_transform(configs, hparams):
+    """
+    Create a RandAugment transform
+    Args:
+        configs: A string that defines the random augmentation configuration.
+            It is composed of multiple parts separated by dashes ("-").
+            The first part defines the AutoAugment policy ('randaug' policy).
+            There is no order requirement for the remaining config parts.
+
+            - m: Integer magnitude of rand augment. Default: 10
+            - n: Integer num layer (number of transform operations selected for each image). Default: 2
+            - w: Integer probability weight index (the index that affects a group of weights selected by operations).
+            - mstd: Floating standard deviation of applied magnitude noise,
+                or uniform sampling at infinity (or greater than 100).
+            - mmax: Set the upper range limit for magnitude to a value
+                other than the default value of _LEVEL_DENOM (10).
+            - inc: Integer (bool), using the severity increase with magnitude (default: 0).
+
+            Example: 'randaug-w0-n3-mstd0.5' will be random augment
+                using the weights 0, num_layers 3, magnitude_std 0.5.
+        hparams: Other hparams (kwargs) for the RandAugmentation scheme.
+    """
+    magnitude = _LEVEL_DENOM  # default to _LEVEL_DENOM for magnitude (currently 10)
+    num_layers = 2  # default to 2 ops per image
+    hparams.setdefault("magnitude_std", 0.5)  # default magnitude_std is set to 0.5
+    weight_idx = None  # default to no probability weights for op choice
+    transforms = _RAND_TRANSFORMS
+    config = configs.split("-")
+    assert config[0] == "randaug"
+    config = config[1:]
+    for c in config:
+        cs = re.split(r"(\d.*)", c)
+        if len(cs) < 2:
+            continue
+        key, val = cs[:2]
+        if key == "mstd":
+            # noise param / randomization of magnitude values
+            mstd = float(val)
+            if mstd > 100:
+                # use uniform sampling in 0 to magnitude if mstd is > 100
+                mstd = float("inf")
+            hparams.setdefault("magnitude_std", mstd)
+        elif key == "mmax":
+            # clip magnitude between [0, mmax] instead of default [0, _LEVEL_DENOM]
+            hparams.setdefault("magnitude_max", int(val))
+        elif key == "inc":
+            if bool(val):
+                transforms = _RAND_INCREASING_TRANSFORMS
+        elif key == "m":
+            magnitude = int(val)
+        elif key == "n":
+            num_layers = int(val)
+        elif key == "w":
+            weight_idx = int(val)
+        else:
+            assert False, "Unknown RandAugment config section"
+    ra_ops = rand_augment_ops(magnitude=magnitude, hparams=hparams, transforms=transforms)
+    choice_weights = None if weight_idx is None else _select_rand_weights(weight_idx)
+    return RandAugment(ra_ops, num_layers, choice_weights=choice_weights)
+
+
+
+ +
+ + +
+ + + +

+mindcv.data.auto_augment.trivial_augment_wide_transform(configs, hparams) + +

+ + +
+ +

Create a TrivialAugmentWide transform

+ + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
configs +
+

A string that defines the TrivialAugmentWide configuration. +It is composed of multiple parts separated by dashes ("-"). +The first part defines the AutoAugment name, it should be 'trivialaugwide'. +the second part(not necessary) the maximum value of magnitude.

+
    +
  • m: final magnitude of a operation will uniform sampling from [0, m] . Default: 31
  • +
+

Example: 'trivialaugwide-m20' will be TrivialAugmentWide +with mgnitude uniform sampling from [0, 20],

+
+

+

+
hparams +
+

Other hparams (kwargs) for the TrivialAugment scheme.

+
+

+

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

A Mindspore compatible Transform

+
+
+ +
+ Source code in mindcv/data/auto_augment.py +
618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
def trivial_augment_wide_transform(configs, hparams):
+    """
+    Create a TrivialAugmentWide transform
+    Args:
+        configs: A string that defines the TrivialAugmentWide configuration.
+            It is composed of multiple parts separated by dashes ("-").
+            The first part defines the AutoAugment name, it should be 'trivialaugwide'.
+            the second part(not necessary) the maximum value of magnitude.
+
+            - m: final magnitude of a operation will uniform sampling from [0, m] . Default: 31
+
+            Example: 'trivialaugwide-m20' will be TrivialAugmentWide
+            with mgnitude uniform sampling from [0, 20],
+        hparams: Other hparams (kwargs) for the TrivialAugment scheme.
+    Returns:
+        A Mindspore compatible Transform
+    """
+    magnitude = 31
+    transforms = _TRIVIALAUGMENT_WIDE_TRANSFORMS
+    config = configs.split("-")
+    assert config[0] == "trivialaugwide"
+    config = config[1:]
+    for c in config:
+        cs = re.split(r"(\d.*)", c)
+        if len(cs) < 2:
+            continue
+        key, val = cs[:2]
+        if key == "m":
+            magnitude = int(val)
+        else:
+            assert False, "Unknown TrivialAugmentWide config section"
+    if not hparams:
+        hparams = dict()
+    hparams["magnitude_max"] = magnitude
+    hparams["magnitude_std"] = float("inf")  # default to uniform sampling
+    hparams["trivialaugwide"] = True
+    ta_ops = trivial_augment_wide_ops(magnitude=magnitude, hparams=hparams, transforms=transforms)
+    return TrivialAugmentWide(ta_ops)
+
+
+
+ +
+ + +
+ + + +

+mindcv.data.auto_augment.augment_and_mix_transform(configs, hparams=None) + +

+ + +
+ +

Create AugMix PyTorch transform

+ + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
configs +
+

String defining configuration of AugMix augmentation. Consists of multiple sections separated +by dashes ('-'). The first section defines the specific name of augment, it should be 'augmix'. +The remaining sections, not order sepecific determine + 'm' - integer magnitude (severity) of augmentation mix (default: 3) + 'w' - integer width of augmentation chain (default: 3) + 'd' - integer depth of augmentation chain (-1 is random [1, 3], default: -1) + 'a' - integer or float, the args of beta deviation of beta for generate the weight, default 1.. +Ex 'augmix-m5-w4-d2' results in AugMix with severity 5, chain width 4, chain depth 2

+
+

+ + TYPE: + str + +

+
hparams +
+

Other hparams (kwargs) for the Augmentation transforms

+
+

+ + DEFAULT: + None + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

A Mindspore compatible Transform

+
+
+ +
+ Source code in mindcv/data/auto_augment.py +
710
+711
+712
+713
+714
+715
+716
+717
+718
+719
+720
+721
+722
+723
+724
+725
+726
+727
+728
+729
+730
+731
+732
+733
+734
+735
+736
+737
+738
+739
+740
+741
+742
+743
+744
+745
+746
+747
+748
+749
+750
+751
+752
+753
+754
def augment_and_mix_transform(configs, hparams=None):
+    """Create AugMix PyTorch transform
+
+    Args:
+        configs (str): String defining configuration of AugMix augmentation. Consists of multiple sections separated
+            by dashes ('-'). The first section defines the specific name of augment, it should be 'augmix'.
+            The remaining sections, not order sepecific determine
+                'm' - integer magnitude (severity) of augmentation mix (default: 3)
+                'w' - integer width of augmentation chain (default: 3)
+                'd' - integer depth of augmentation chain (-1 is random [1, 3], default: -1)
+                'a' - integer or float, the args of beta deviation of beta for generate the weight, default 1..
+            Ex 'augmix-m5-w4-d2' results in AugMix with severity 5, chain width 4, chain depth 2
+
+        hparams: Other hparams (kwargs) for the Augmentation transforms
+
+    Returns:
+         A Mindspore compatible Transform
+    """
+    magnitude = 3
+    width = 3
+    depth = -1
+    alpha = 1.0
+    config = configs.split("-")
+    assert config[0] == "augmix"
+    config = config[1:]
+    for c in config:
+        cs = re.split(r"(\d.*)", c)
+        if len(cs) < 2:
+            continue
+        key, val = cs[:2]
+        if key == "m":
+            magnitude = int(val)
+        elif key == "w":
+            width = int(val)
+        elif key == "d":
+            depth = int(val)
+        elif key == "a":
+            alpha = float(val)
+        else:
+            assert False, "Unknown AugMix config section"
+    if not hparams:
+        hparams = dict()
+    hparams["magnitude_std"] = float("inf")  # default to uniform sampling (if not set via mstd arg)
+    ops = augmix_ops(magnitude=magnitude, hparams=hparams)
+    return AugMixAugment(ops, alpha=alpha, width=width, depth=depth)
+
+
+
+ +

Dataset Factory

+ + + +
+ + + +

+mindcv.data.dataset_factory.create_dataset(name='', root=None, split='train', shuffle=True, num_samples=None, num_shards=None, shard_id=None, num_parallel_workers=None, download=False, num_aug_repeats=0, **kwargs) + +

+ + +
+ +

Creates dataset by name.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
name +
+

dataset name like MNIST, CIFAR10, ImageNeT, ''. '' means a customized dataset. Default: ''.

+
+

+ + TYPE: + str + + + DEFAULT: + '' + +

+
root +
+

dataset root dir. Default: None.

+
+

+ + TYPE: + Optional[str] + + + DEFAULT: + None + +

+
split +
+

data split: '' or split name string (train/val/test), if it is '', no split is used. +Otherwise, it is a subfolder of root dir, e.g., train, val, test. Default: 'train'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'train' + +

+
shuffle +
+

whether to shuffle the dataset. Default: True.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
num_samples +
+

Number of elements to sample (default=None, which means sample all elements).

+
+

+ + TYPE: + Optional[int] + + + DEFAULT: + None + +

+
num_shards +
+

Number of shards that the dataset will be divided into (default=None). +When this argument is specified, num_samples reflects the maximum sample number of per shard.

+
+

+ + TYPE: + Optional[int] + + + DEFAULT: + None + +

+
shard_id +
+

The shard ID within num_shards (default=None). +This argument can only be specified when num_shards is also specified.

+
+

+ + TYPE: + Optional[int] + + + DEFAULT: + None + +

+
num_parallel_workers +
+

Number of workers to read the data (default=None, set in the config).

+
+

+ + TYPE: + Optional[int] + + + DEFAULT: + None + +

+
download +
+

whether to download the dataset. Default: False

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_aug_repeats +
+

Number of dataset repetition for repeated augmentation. +If 0 or 1, repeated augmentation is disabled. +Otherwise, repeated augmentation is enabled and the common choice is 3. (Default: 0)

+
+

+ + TYPE: + int + + + DEFAULT: + 0 + +

+
+ +
+ Note +

For custom datasets and imagenet, the dataset dir should follow the structure like: +.dataset_name/ +├── split1/ +│ ├── class1/ +│ │ ├── 000001.jpg +│ │ ├── 000002.jpg +│ │ └── .... +│ └── class2/ +│ ├── 000001.jpg +│ ├── 000002.jpg +│ └── .... +└── split2/ + ├── class1/ + │ ├── 000001.jpg + │ ├── 000002.jpg + │ └── .... + └── class2/ + ├── 000001.jpg + ├── 000002.jpg + └── ....

+
+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

Dataset object

+
+
+ +
+ Source code in mindcv/data/dataset_factory.py +
 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
def create_dataset(
+    name: str = "",
+    root: Optional[str] = None,
+    split: str = "train",
+    shuffle: bool = True,
+    num_samples: Optional[int] = None,
+    num_shards: Optional[int] = None,
+    shard_id: Optional[int] = None,
+    num_parallel_workers: Optional[int] = None,
+    download: bool = False,
+    num_aug_repeats: int = 0,
+    **kwargs,
+):
+    r"""Creates dataset by name.
+
+    Args:
+        name: dataset name like MNIST, CIFAR10, ImageNeT, ''. '' means a customized dataset. Default: ''.
+        root: dataset root dir. Default: None.
+        split: data split: '' or split name string (train/val/test), if it is '', no split is used.
+            Otherwise, it is a subfolder of root dir, e.g., train, val, test. Default: 'train'.
+        shuffle: whether to shuffle the dataset. Default: True.
+        num_samples: Number of elements to sample (default=None, which means sample all elements).
+        num_shards: Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
+        shard_id: The shard ID within `num_shards` (default=None).
+            This argument can only be specified when `num_shards` is also specified.
+        num_parallel_workers: Number of workers to read the data (default=None, set in the config).
+        download: whether to download the dataset. Default: False
+        num_aug_repeats: Number of dataset repetition for repeated augmentation.
+            If 0 or 1, repeated augmentation is disabled.
+            Otherwise, repeated augmentation is enabled and the common choice is 3. (Default: 0)
+
+    Note:
+        For custom datasets and imagenet, the dataset dir should follow the structure like:
+        .dataset_name/
+        ├── split1/
+        │  ├── class1/
+        │  │   ├── 000001.jpg
+        │  │   ├── 000002.jpg
+        │  │   └── ....
+        │  └── class2/
+        │      ├── 000001.jpg
+        │      ├── 000002.jpg
+        │      └── ....
+        └── split2/
+           ├── class1/
+           │   ├── 000001.jpg
+           │   ├── 000002.jpg
+           │   └── ....
+           └── class2/
+               ├── 000001.jpg
+               ├── 000002.jpg
+               └── ....
+
+    Returns:
+        Dataset object
+    """
+    name = name.lower()
+    if root is None:
+        root = os.path.join(get_dataset_download_root(), name)
+
+    assert (num_samples is None) or (num_aug_repeats == 0), "num_samples and num_aug_repeats can NOT be set together."
+
+    # subset sampling
+    if num_samples is not None and num_samples > 0:
+        # TODO: rewrite ordered distributed sampler (subset sampling in distributed mode is not tested)
+        if num_shards is not None and num_shards > 1:  # distributed
+            _logger.info(f"number of shards: {num_shards}, number of samples: {num_samples}")
+            sampler = DistributedSampler(num_shards, shard_id, shuffle=shuffle, num_samples=num_samples)
+        else:  # standalone
+            if shuffle:
+                sampler = ds.RandomSampler(replacement=False, num_samples=num_samples)
+            else:
+                sampler = ds.SequentialSampler(num_samples=num_samples)
+        mindspore_kwargs = dict(
+            shuffle=None,
+            sampler=sampler,
+            num_parallel_workers=num_parallel_workers,
+            **kwargs,
+        )
+    else:
+        sampler = None
+        mindspore_kwargs = dict(
+            shuffle=shuffle,
+            sampler=sampler,
+            num_shards=num_shards,
+            shard_id=shard_id,
+            num_parallel_workers=num_parallel_workers,
+            **kwargs,
+        )
+
+    # sampler for repeated augmentation
+    if num_aug_repeats > 0:
+        dataset_size = get_dataset_size(name, root, split)
+        _logger.info(
+            f"Repeated augmentation is enabled, num_aug_repeats: {num_aug_repeats}, "
+            f"original dataset size: {dataset_size}."
+        )
+        # since drop_remainder is usually True, we don't need to do rounding in sampling
+        sampler = RepeatAugSampler(
+            dataset_size,
+            num_shards=num_shards,
+            rank_id=shard_id,
+            num_repeats=num_aug_repeats,
+            selected_round=0,
+            shuffle=shuffle,
+        )
+        mindspore_kwargs = dict(shuffle=None, sampler=sampler, num_shards=None, shard_id=None, **kwargs)
+
+    # create dataset
+    if name in _MINDSPORE_BASIC_DATASET:
+        dataset_class = _MINDSPORE_BASIC_DATASET[name][0]
+        dataset_download = _MINDSPORE_BASIC_DATASET[name][1]
+        dataset_new_path = None
+        if download:
+            if shard_id is not None:
+                root = os.path.join(root, f"dataset_{str(shard_id)}")
+            dataset_download = dataset_download(root)
+            dataset_download.download()
+            dataset_new_path = dataset_download.path
+
+        dataset = dataset_class(
+            dataset_dir=dataset_new_path if dataset_new_path else root,
+            usage=split,
+            **mindspore_kwargs,
+        )
+        # address ms dataset num_classes empty issue
+        if name == "mnist":
+            dataset.num_classes = lambda: 10
+        elif name == "cifar10":
+            dataset.num_classes = lambda: 10
+        elif name == "cifar100":
+            dataset.num_classes = lambda: 100
+
+    else:
+        if name == "imagenet" and download:
+            raise ValueError(
+                "Imagenet dataset download is not supported. "
+                "Please download imagenet from https://www.image-net.org/download.php, "
+                "and parse the path of dateset directory via args.data_dir."
+            )
+
+        if os.path.isdir(root):
+            root = os.path.join(root, split)
+        dataset = ImageFolderDataset(dataset_dir=root, **mindspore_kwargs)
+        """ Another implementation which a bit slower than ImageFolderDataset
+            imagenet_dataset = ImageNetDataset(dataset_dir=root)
+            sampler = RepeatAugSampler(len(imagenet_dataset), num_shards=num_shards, rank_id=shard_id,
+                                       num_repeats=repeated_aug, selected_round=1, shuffle=shuffle)
+            dataset = ds.GeneratorDataset(imagenet_dataset, column_names=imagenet_dataset.column_names, sampler=sampler)
+        """
+    return dataset
+
+
+
+ +

Sampler

+ + +
+ + + +

+ mindcv.data.distributed_sampler.RepeatAugSampler + + +

+ + +
+ + +

Sampler that restricts data loading to a subset of the dataset for distributed, +with repeated augmentation. +It ensures that different each augmented version of a sample will be visible to a +different process.

+

This sampler was adapted from https://github.com/facebookresearch/deit/blob/0c4b8f60/samplers.py

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
dataset_size +
+

dataset size.

+
+

+

+
num_shards +
+

num devices.

+
+

+ + DEFAULT: + None + +

+
rank_id +
+

device id.

+
+

+ + DEFAULT: + None + +

+
shuffle(bool) +
+

True for using shuffle, False for not using.

+
+

+

+
num_repeats(int) +
+

num of repeated instances in repeated augmentation, Default:3.

+
+

+

+
selected_round(int) +
+

round the total num of samples by this factor, Defailt:256.

+
+

+

+
+ +
+ Source code in mindcv/data/distributed_sampler.py +
10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
+87
+88
+89
+90
class RepeatAugSampler:
+    """Sampler that restricts data loading to a subset of the dataset for distributed,
+    with repeated augmentation.
+    It ensures that different each augmented version of a sample will be visible to a
+    different process.
+
+    This sampler was adapted from https://github.com/facebookresearch/deit/blob/0c4b8f60/samplers.py
+
+    Args:
+        dataset_size: dataset size.
+        num_shards: num devices.
+        rank_id: device id.
+        shuffle(bool): True for using shuffle, False for not using.
+        num_repeats(int): num of repeated instances in repeated augmentation, Default:3.
+        selected_round(int): round the total num of samples by this factor, Defailt:256.
+    """
+
+    def __init__(
+        self,
+        dataset_size,
+        num_shards=None,
+        rank_id=None,
+        shuffle=True,
+        num_repeats=3,
+        selected_round=256,
+    ):
+        if num_shards is None:
+            _logger.warning("num_shards is set to 1 in RepeatAugSampler since it is not passed in")
+            num_shards = 1
+        if rank_id is None:
+            rank_id = 0
+
+        # assert isinstance(num_repeats, int), f'num_repeats should be Type integer, but got {type(num_repeats)}'
+
+        self.dataset_size = dataset_size
+        self.num_shards = num_shards
+        self.rank_id = rank_id
+        self.shuffle = shuffle
+        self.num_repeats = int(num_repeats)
+        self.epoch = 0
+        self.num_samples = int(math.ceil(self.dataset_size * num_repeats / self.num_shards))
+        self.total_size = self.num_samples * self.num_shards
+        # Determine the number of samples to select per epoch for each rank.
+        if selected_round:
+            self.num_selected_samples = int(
+                math.floor(self.dataset_size // selected_round * selected_round / num_shards)
+            )
+        else:
+            self.num_selected_samples = int(math.ceil(self.dataset_size / num_shards))
+
+    def __iter__(self):
+        # deterministically shuffle based on epoch
+        # print('__iter__  generating new shuffled indices: ', self.epoch)
+        if self.shuffle:
+            indices = np.random.RandomState(seed=self.epoch).permutation(self.dataset_size)
+            indices = indices.tolist()
+            self.epoch += 1
+            # print(indices[:30])
+        else:
+            indices = list(range(self.dataset_size))
+        # produce repeats e.g. [0, 0, 0, 1, 1, 1, 2, 2, 2....]
+        indices = [ele for ele in indices for i in range(self.num_repeats)]
+
+        # add extra samples to make it evenly divisible
+        padding_size = self.total_size - len(indices)
+        if padding_size > 0:
+            indices += indices[:padding_size]
+        assert len(indices) == self.total_size
+
+        # subsample per rank
+        indices = indices[self.rank_id : self.total_size : self.num_shards]
+        assert len(indices) == self.num_samples
+
+        # return up to num selected samples
+        return iter(indices[: self.num_selected_samples])
+
+    def __len__(self):
+        return self.num_selected_samples
+
+    def set_epoch(self, epoch):
+        self.epoch = epoch
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

DataLoader

+ + + +
+ + + +

+mindcv.data.loader.create_loader(dataset, batch_size, drop_remainder=False, is_training=False, mixup=0.0, cutmix=0.0, cutmix_prob=0.0, num_classes=1000, transform=None, target_transform=None, num_parallel_workers=None, python_multiprocessing=False, separate=False) + +

+ + +
+ +

Creates dataloader.

+

Applies operations such as transform and batch to the ms.dataset.Dataset object +created by the create_dataset function to get the dataloader.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
dataset +
+

dataset object created by create_dataset.

+
+

+ + TYPE: + ms.dataset.Dataset + +

+
batch_size +
+

The number of rows each batch is created with. An +int or callable object which takes exactly 1 parameter, BatchInfo.

+
+

+ + TYPE: + int or function + +

+
drop_remainder +
+

Determines whether to drop the last block +whose data row number is less than batch size (default=False). If True, and if there are less +than batch_size rows available to make the last batch, then those rows will +be dropped and not propagated to the child node.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
is_training +
+

whether it is in train mode. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
mixup +
+

mixup alpha, mixup will be enabled if > 0. (default=0.0).

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
cutmix +
+

cutmix alpha, cutmix will be enabled if > 0. (default=0.0). This operation is experimental.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
cutmix_prob +
+

prob of doing cutmix for an image (default=0.0)

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
num_classes +
+

the number of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
transform +
+

the list of transformations that wil be applied on the image, +which is obtained by create_transform. If None, the default imagenet transformation +for evaluation will be applied. Default: None.

+
+

+ + TYPE: + list or None + + + DEFAULT: + None + +

+
target_transform +
+

the list of transformations that will be applied on the label. +If None, the label will be converted to the type of ms.int32. Default: None.

+
+

+ + TYPE: + list or None + + + DEFAULT: + None + +

+
num_parallel_workers +
+

Number of workers(threads) to process the dataset in parallel +(default=None).

+
+

+ + TYPE: + int + + + DEFAULT: + None + +

+
python_multiprocessing +
+

Parallelize Python operations with multiple worker processes. This +option could be beneficial if the Python operation is computational heavy (default=False).

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
separate(bool, +
+

separate the image origin and the image been transformed

+
+

+ + TYPE: + optional + +

+
+ +
+ Note +
    +
  1. cutmix is now experimental (which means performance gain is not guarantee) + and can not be used together with mixup due to the label int type conflict.
  2. +
  3. is_training, mixup, num_classes is used for MixUp, which is a kind of transform operation. + However, we are not able to merge it into transform, due to the limitations of the mindspore.dataset API.
  4. +
+
+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

BatchDataset, dataset batched.

+
+
+ +
+ Source code in mindcv/data/loader.py +
 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
def create_loader(
+    dataset,
+    batch_size,
+    drop_remainder=False,
+    is_training=False,
+    mixup=0.0,
+    cutmix=0.0,
+    cutmix_prob=0.0,
+    num_classes=1000,
+    transform=None,
+    target_transform=None,
+    num_parallel_workers=None,
+    python_multiprocessing=False,
+    separate=False,
+):
+    r"""Creates dataloader.
+
+    Applies operations such as transform and batch to the `ms.dataset.Dataset` object
+    created by the `create_dataset` function to get the dataloader.
+
+    Args:
+        dataset (ms.dataset.Dataset): dataset object created by `create_dataset`.
+        batch_size (int or function): The number of rows each batch is created with. An
+            int or callable object which takes exactly 1 parameter, BatchInfo.
+        drop_remainder (bool, optional): Determines whether to drop the last block
+            whose data row number is less than batch size (default=False). If True, and if there are less
+            than batch_size rows available to make the last batch, then those rows will
+            be dropped and not propagated to the child node.
+        is_training (bool): whether it is in train mode. Default: False.
+        mixup (float): mixup alpha, mixup will be enabled if > 0. (default=0.0).
+        cutmix (float): cutmix alpha, cutmix will be enabled if > 0. (default=0.0). This operation is experimental.
+        cutmix_prob (float): prob of doing cutmix for an image (default=0.0)
+        num_classes (int): the number of classes. Default: 1000.
+        transform (list or None): the list of transformations that wil be applied on the image,
+            which is obtained by `create_transform`. If None, the default imagenet transformation
+            for evaluation will be applied. Default: None.
+        target_transform (list or None): the list of transformations that will be applied on the label.
+            If None, the label will be converted to the type of ms.int32. Default: None.
+        num_parallel_workers (int, optional): Number of workers(threads) to process the dataset in parallel
+            (default=None).
+        python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker processes. This
+            option could be beneficial if the Python operation is computational heavy (default=False).
+        separate(bool, optional): separate the image origin and the image been transformed
+
+    Note:
+        1. cutmix is now experimental (which means performance gain is not guarantee)
+            and can not be used together with mixup due to the label int type conflict.
+        2. `is_training`, `mixup`, `num_classes` is used for MixUp, which is a kind of transform operation.
+          However, we are not able to merge it into `transform`, due to the limitations of the `mindspore.dataset` API.
+
+
+    Returns:
+        BatchDataset, dataset batched.
+    """
+
+    if target_transform is None:
+        target_transform = transforms.TypeCast(ms.int32)
+    target_input_columns = "label" if "label" in dataset.get_col_names() else "fine_label"
+    dataset = dataset.map(
+        operations=target_transform,
+        input_columns=target_input_columns,
+        num_parallel_workers=num_parallel_workers,
+        python_multiprocessing=python_multiprocessing,
+    )
+
+    if transform is None:
+        warnings.warn(
+            "Using None as the default value of transform will set it back to "
+            "traditional image transform, which is not recommended. "
+            "You should explicitly call `create_transforms` and pass it to `create_loader`."
+        )
+        transform = create_transforms("imagenet", is_training=False)
+
+    # only apply augment splits to train dataset
+    if separate and is_training:
+        assert isinstance(transform, tuple) and len(transform) == 3
+
+        # Note: mindspore-2.0 delete the parameter column_order
+        sig = inspect.signature(dataset.map)
+        pass_column_order = False if "kwargs" in sig.parameters else True
+
+        # map all the transform
+        dataset = map_transform_splits(
+            dataset, transform, num_parallel_workers, python_multiprocessing, pass_column_order
+        )
+        # after batch, datasets has 4 columns
+        dataset = dataset.batch(batch_size=batch_size, drop_remainder=drop_remainder)
+        # concat the 3 columns of image
+        dataset = dataset.map(
+            operations=concat_per_batch_map,
+            input_columns=["image_clean", "image_aug1", "image_aug2", "label"],
+            output_columns=["image", "label"],
+            column_order=["image", "label"] if pass_column_order else None,
+            num_parallel_workers=num_parallel_workers,
+            python_multiprocessing=python_multiprocessing,
+        )
+
+    else:
+        dataset = dataset.map(
+            operations=transform,
+            input_columns="image",
+            num_parallel_workers=num_parallel_workers,
+            python_multiprocessing=python_multiprocessing,
+        )
+
+        dataset = dataset.batch(batch_size=batch_size, drop_remainder=drop_remainder)
+
+    if is_training:
+        if (mixup + cutmix > 0.0) and batch_size > 1:
+            # TODO: use mindspore vision cutmix and mixup after the confliction fixed in later release
+            # set label_smoothing 0 here since label smoothing is computed in loss module
+            mixup_fn = Mixup(
+                mixup_alpha=mixup,
+                cutmix_alpha=cutmix,
+                cutmix_minmax=None,
+                prob=cutmix_prob,
+                switch_prob=0.5,
+                label_smoothing=0.0,
+                num_classes=num_classes,
+            )
+            # images in a batch are mixed. labels are converted soft onehot labels.
+            dataset = dataset.map(
+                operations=mixup_fn,
+                input_columns=["image", target_input_columns],
+                num_parallel_workers=num_parallel_workers,
+            )
+
+    return dataset
+
+
+
+ +

MixUp

+ + +
+ + + +

+ mindcv.data.mixup.Mixup + + +

+ + +
+ + +

Mixup/Cutmix that applies different params to each element or whole batch

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
mixup_alpha +
+

mixup alpha value, mixup is active if > 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
cutmix_alpha +
+

cutmix alpha value, cutmix is active if > 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
cutmix_minmax +
+

cutmix min/max image ratio, cutmix is active and uses this vs alpha if not None.

+
+

+ + TYPE: + List[float] + + + DEFAULT: + None + +

+
prob +
+

probability of applying mixup or cutmix per batch or element

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
switch_prob +
+

probability of switching to cutmix instead of mixup when both are active

+
+

+ + TYPE: + float + + + DEFAULT: + 0.5 + +

+
mode +
+

how to apply mixup/cutmix params (per 'batch', 'pair' (pair of elements), 'elem' (element)

+
+

+ + TYPE: + str + + + DEFAULT: + 'batch' + +

+
correct_lam +
+

apply lambda correction when cutmix bbox clipped by image borders

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
label_smoothing +
+

apply label smoothing to the mixed target tensor

+
+

+ + TYPE: + float + + + DEFAULT: + 0.1 + +

+
num_classes +
+

number of classes for target

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
+ +
+ Source code in mindcv/data/mixup.py +
 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
class Mixup:
+    """Mixup/Cutmix that applies different params to each element or whole batch
+
+    Args:
+        mixup_alpha (float): mixup alpha value, mixup is active if > 0.
+        cutmix_alpha (float): cutmix alpha value, cutmix is active if > 0.
+        cutmix_minmax (List[float]): cutmix min/max image ratio, cutmix is active and uses this vs alpha if not None.
+        prob (float): probability of applying mixup or cutmix per batch or element
+        switch_prob (float): probability of switching to cutmix instead of mixup when both are active
+        mode (str): how to apply mixup/cutmix params (per 'batch', 'pair' (pair of elements), 'elem' (element)
+        correct_lam (bool): apply lambda correction when cutmix bbox clipped by image borders
+        label_smoothing (float): apply label smoothing to the mixed target tensor
+        num_classes (int): number of classes for target
+    """
+
+    def __init__(
+        self,
+        mixup_alpha=1.0,
+        cutmix_alpha=0.0,
+        cutmix_minmax=None,
+        prob=1.0,
+        switch_prob=0.5,
+        mode="batch",
+        correct_lam=True,
+        label_smoothing=0.1,
+        num_classes=1000,
+    ):
+        self.mixup_alpha = mixup_alpha
+        self.cutmix_alpha = cutmix_alpha
+        self.cutmix_minmax = cutmix_minmax
+        if self.cutmix_minmax is not None:
+            assert len(self.cutmix_minmax) == 2
+            # force cutmix alpha == 1.0 when minmax active to keep logic simple & safe
+            self.cutmix_alpha = 1.0
+        self.mix_prob = prob
+        self.switch_prob = switch_prob
+        self.label_smoothing = label_smoothing
+        self.num_classes = num_classes
+        self.mode = mode
+        self.correct_lam = correct_lam  # correct lambda based on clipped area for cutmix
+        self.mixup_enabled = True  # set false to disable mixing (intended tp be set by train loop)
+
+    def _params_per_elem(self, batch_size):
+        """_params_per_elem"""
+        lam = np.ones(batch_size, dtype=np.float32)
+        use_cutmix = np.zeros(batch_size, dtype=np.bool)
+        if self.mixup_enabled:
+            if self.mixup_alpha > 0.0 and self.cutmix_alpha > 0.0:
+                use_cutmix = np.random.rand(batch_size) < self.switch_prob
+                lam_mix = np.where(
+                    use_cutmix,
+                    np.random.beta(self.cutmix_alpha, self.cutmix_alpha, size=batch_size),
+                    np.random.beta(self.mixup_alpha, self.mixup_alpha, size=batch_size),
+                )
+            elif self.mixup_alpha > 0.0:
+                lam_mix = np.random.beta(self.mixup_alpha, self.mixup_alpha, size=batch_size)
+            elif self.cutmix_alpha > 0.0:
+                use_cutmix = np.ones(batch_size, dtype=np.bool)
+                lam_mix = np.random.beta(self.cutmix_alpha, self.cutmix_alpha, size=batch_size)
+            else:
+                assert False, "One of mixup_alpha > 0., cutmix_alpha > 0., cutmix_minmax not None should be true."
+            lam = np.where(np.random.rand(batch_size) < self.mix_prob, lam_mix.astype(np.float32), lam)
+        return lam, use_cutmix
+
+    def _params_per_batch(self):
+        """_params_per_batch"""
+        lam = 1.0
+        use_cutmix = False
+        if self.mixup_enabled and np.random.rand() < self.mix_prob:
+            if self.mixup_alpha > 0.0 and self.cutmix_alpha > 0.0:
+                use_cutmix = np.random.rand() < self.switch_prob
+                lam_mix = (
+                    np.random.beta(self.cutmix_alpha, self.cutmix_alpha)
+                    if use_cutmix
+                    else np.random.beta(self.mixup_alpha, self.mixup_alpha)
+                )
+            elif self.mixup_alpha > 0.0:
+                lam_mix = np.random.beta(self.mixup_alpha, self.mixup_alpha)
+            elif self.cutmix_alpha > 0.0:
+                use_cutmix = True
+                lam_mix = np.random.beta(self.cutmix_alpha, self.cutmix_alpha)
+            else:
+                assert False, "One of mixup_alpha > 0., cutmix_alpha > 0., cutmix_minmax not None should be true."
+            lam = float(lam_mix)
+        return lam, use_cutmix
+
+    def _mix_elem(self, x):
+        """_mix_elem"""
+        batch_size = len(x)
+        lam_batch, use_cutmix = self._params_per_elem(batch_size)
+        x_orig = x.clone()  # need to keep an unmodified original for mixing source
+        for i in range(batch_size):
+            j = batch_size - i - 1
+            lam = lam_batch[i]
+            if lam != 1.0:
+                if use_cutmix[i]:
+                    (yl, yh, xl, xh), lam = cutmix_bbox_and_lam(
+                        x[i].shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam
+                    )
+                    x[i][:, yl:yh, xl:xh] = x_orig[j][:, yl:yh, xl:xh]
+                    lam_batch[i] = lam
+                else:
+                    x[i] = x[i] * lam + x_orig[j] * (1 - lam)
+        return P.ExpandDims()(Tensor(lam_batch, dtype=mstype.float32), 1)
+
+    def _mix_pair(self, x):
+        """_mix_pair"""
+        batch_size = len(x)
+        lam_batch, use_cutmix = self._params_per_elem(batch_size // 2)
+        x_orig = x.clone()  # need to keep an unmodified original for mixing source
+        for i in range(batch_size // 2):
+            j = batch_size - i - 1
+            lam = lam_batch[i]
+            if lam != 1.0:
+                if use_cutmix[i]:
+                    (yl, yh, xl, xh), lam = cutmix_bbox_and_lam(
+                        x[i].shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam
+                    )
+                    x[i][:, yl:yh, xl:xh] = x_orig[j][:, yl:yh, xl:xh]
+                    x[j][:, yl:yh, xl:xh] = x_orig[i][:, yl:yh, xl:xh]
+                    lam_batch[i] = lam
+                else:
+                    x[i] = x[i] * lam + x_orig[j] * (1 - lam)
+                    x[j] = x[j] * lam + x_orig[i] * (1 - lam)
+        lam_batch = np.concatenate((lam_batch, lam_batch[::-1]))
+        return P.ExpandDims()(Tensor(lam_batch, dtype=mstype.float32), 1)
+
+    def _mix_batch(self, x):
+        """_mix_batch"""
+        lam, use_cutmix = self._params_per_batch()
+        if lam == 1.0:
+            return 1.0
+        if use_cutmix:
+            (yl, yh, xl, xh), lam = cutmix_bbox_and_lam(
+                x.shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam
+            )
+            x[:, :, yl:yh, xl:xh] = np.flip(x, axis=0)[:, :, yl:yh, xl:xh]
+        else:
+            x_flipped = np.flip(x, axis=0) * (1.0 - lam)
+            x *= lam
+            x += x_flipped
+        return lam
+
+    def __call__(self, x, target):
+        """Mixup apply"""
+        # the same to image, label
+        assert len(x) % 2 == 0, "Batch size should be even when using this"
+        if self.mode == "elem":
+            lam = self._mix_elem(x)
+        elif self.mode == "pair":
+            lam = self._mix_pair(x)
+        else:
+            lam = self._mix_batch(x)
+        target = mixup_target(target, self.num_classes, lam, self.label_smoothing)
+        return x.astype(np.float32), target.astype(np.float32)
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Transform Factory

+ + + +
+ + + +

+mindcv.data.transforms_factory.create_transforms(dataset_name='', image_resize=224, is_training=False, auto_augment=None, separate=False, **kwargs) + +

+ + +
+ +

Creates a list of transform operation on image data.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
dataset_name +
+

if '', customized dataset. Currently, apply the same transform pipeline as ImageNet. +if standard dataset name is given including imagenet, cifar10, mnist, preset transforms will be returned. +Default: ''.

+
+

+ + TYPE: + str + + + DEFAULT: + '' + +

+
image_resize +
+

the image size after resize for adapting to network. Default: 224.

+
+

+ + TYPE: + int + + + DEFAULT: + 224 + +

+
is_training +
+

if True, augmentation will be applied if support. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
separate +
+

separate the image origin and the image been transformed.

+
+

+ + DEFAULT: + False + +

+
**kwargs +
+

additional args parsed to transforms_imagenet_train and transforms_imagenet_eval

+
+

+ + DEFAULT: + {} + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

A list of transformation operations

+
+
+ +
+ Source code in mindcv/data/transforms_factory.py +
182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
def create_transforms(
+    dataset_name="",
+    image_resize=224,
+    is_training=False,
+    auto_augment=None,
+    separate=False,
+    **kwargs,
+):
+    r"""Creates a list of transform operation on image data.
+
+    Args:
+        dataset_name (str): if '', customized dataset. Currently, apply the same transform pipeline as ImageNet.
+            if standard dataset name is given including imagenet, cifar10, mnist, preset transforms will be returned.
+            Default: ''.
+        image_resize (int): the image size after resize for adapting to network. Default: 224.
+        is_training (bool): if True, augmentation will be applied if support. Default: False.
+        auto_augment(str):augmentation strategies, such as "augmix", "autoaug" etc.
+        separate: separate the image origin and the image been transformed.
+        **kwargs: additional args parsed to `transforms_imagenet_train` and `transforms_imagenet_eval`
+
+    Returns:
+        A list of transformation operations
+    """
+
+    dataset_name = dataset_name.lower()
+
+    if dataset_name in ("imagenet", ""):
+        trans_args = dict(image_resize=image_resize, **kwargs)
+        if is_training:
+            return transforms_imagenet_train(auto_augment=auto_augment, separate=separate, **trans_args)
+
+        return transforms_imagenet_eval(**trans_args)
+    elif dataset_name in ("cifar10", "cifar100"):
+        trans_list = transforms_cifar(resize=image_resize, is_training=is_training)
+        return trans_list
+    elif dataset_name == "mnist":
+        trans_list = transforms_mnist(resize=image_resize)
+        return trans_list
+    else:
+        raise NotImplementedError(
+            f"Only supports creating transforms for ['imagenet'] datasets, but got {dataset_name}."
+        )
+
+
+
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/reference/loss/index.html b/reference/loss/index.html new file mode 100644 index 000000000..16a0af2e1 --- /dev/null +++ b/reference/loss/index.html @@ -0,0 +1,1994 @@ + + + + + + + + + + + + + + + + + + + + + + + + loss - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Loss

+

Loss Factory

+ + + +
+ + + +

+mindcv.loss.loss_factory.create_loss(name='CE', weight=None, reduction='mean', label_smoothing=0.0, aux_factor=0.0) + +

+ + +
+ +

Creates loss function

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
name +
+

loss name : 'CE' for cross_entropy. 'BCE': binary cross entropy. Default: 'CE'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'CE' + +

+
weight +
+

Class weight. A rescaling weight given to the loss of each batch element. +If given, has to be a Tensor of size 'nbatch'. Data type must be float16 or float32.

+
+

+ + TYPE: + Tensor + + + DEFAULT: + None + +

+
reduction +
+

Apply specific reduction method to the output: 'mean' or 'sum'. +By default, the sum of the output will be divided by the number of elements in the output. +'sum': the output will be summed. Default:'mean'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'mean' + +

+
label_smoothing +
+

Label smoothing factor, a regularization tool used to prevent the model +from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
aux_factor +
+

Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs +(i.e., deep supervision), like inception_v3. Default: 0.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
+ +
+ Inputs +
    +
  • logits (Tensor or Tuple of Tensor): Input logits. Shape [N, C], where N means the number of samples, + C means number of classes. Tuple of two input logits are supported in order (main_logits, aux_logits) + for auxiliary loss used in networks like inception_v3. Data type must be float16 or float32.
  • +
  • labels (Tensor): Ground truth labels. Shape: [N] or [N, C]. + (1) If in shape [N], sparse labels representing the class indices. Must be int type. + (2) shape [N, C], dense labels representing the ground truth class probability values, + or the one-hot labels. Must be float type. If the loss type is BCE, the shape of labels must be [N, C].
  • +
+
+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

Loss function to compute the loss between the input logits and labels.

+
+
+ +
+ Source code in mindcv/loss/loss_factory.py +
14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
def create_loss(
+    name: str = "CE",
+    weight: Optional[Tensor] = None,
+    reduction: str = "mean",
+    label_smoothing: float = 0.0,
+    aux_factor: float = 0.0,
+):
+    r"""Creates loss function
+
+    Args:
+        name (str):  loss name : 'CE' for cross_entropy. 'BCE': binary cross entropy. Default: 'CE'.
+        weight (Tensor): Class weight. A rescaling weight given to the loss of each batch element.
+            If given, has to be a Tensor of size 'nbatch'. Data type must be float16 or float32.
+        reduction: Apply specific reduction method to the output: 'mean' or 'sum'.
+            By default, the sum of the output will be divided by the number of elements in the output.
+            'sum': the output will be summed. Default:'mean'.
+        label_smoothing: Label smoothing factor, a regularization tool used to prevent the model
+            from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.
+        aux_factor (float): Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs
+            (i.e., deep supervision), like inception_v3. Default: 0.0.
+
+    Inputs:
+        - logits (Tensor or Tuple of Tensor): Input logits. Shape [N, C], where N means the number of samples,
+            C means number of classes. Tuple of two input logits are supported in order (main_logits, aux_logits)
+            for auxiliary loss used in networks like inception_v3. Data type must be float16 or float32.
+        - labels (Tensor): Ground truth labels. Shape: [N] or [N, C].
+            (1) If in shape [N], sparse labels representing the class indices. Must be int type.
+            (2) shape [N, C], dense labels representing the ground truth class probability values,
+            or the one-hot labels. Must be float type. If the loss type is BCE, the shape of labels must be [N, C].
+
+    Returns:
+       Loss function to compute the loss between the input logits and labels.
+    """
+    name = name.lower()
+
+    if name == "ce":
+        loss = CrossEntropySmooth(smoothing=label_smoothing, aux_factor=aux_factor, reduction=reduction, weight=weight)
+    elif name == "bce":
+        loss = BinaryCrossEntropySmooth(
+            smoothing=label_smoothing, aux_factor=aux_factor, reduction=reduction, weight=weight, pos_weight=None
+        )
+    elif name == "asl_single_label":
+        loss = AsymmetricLossSingleLabel(smoothing=label_smoothing)
+    elif name == "asl_multi_label":
+        loss = AsymmetricLossMultilabel()
+    elif name == "jsd":
+        loss = JSDCrossEntropy(smoothing=label_smoothing, aux_factor=aux_factor, reduction=reduction, weight=weight)
+    else:
+        raise NotImplementedError
+
+    return loss
+
+
+
+ +

Cross Entropy

+ + +
+ + + +

+ mindcv.loss.cross_entropy_smooth.CrossEntropySmooth + + +

+ + +
+

+ Bases: nn.LossBase

+ + +

Cross entropy loss with label smoothing. +Apply softmax activation function to input logits, and uses the given logits to compute cross entropy +between the logits and the label.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
smoothing +
+

Label smoothing factor, a regularization tool used to prevent the model +from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.

+
+

+ + DEFAULT: + 0.0 + +

+
aux_factor +
+

Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs +(i.e., deep supervision), like inception_v3. Default: 0.0.

+
+

+ + DEFAULT: + 0.0 + +

+
reduction +
+

Apply specific reduction method to the output: 'mean' or 'sum'. Default: 'mean'.

+
+

+ + DEFAULT: + 'mean' + +

+
weight +
+

Class weight. Shape [C]. A rescaling weight applied to the loss of each batch element. +Data type must be float16 or float32.

+
+

+ + TYPE: + Tensor + + + DEFAULT: + None + +

+
+ +
+ Inputs +

logits (Tensor or Tuple of Tensor): Input logits. Shape [N, C], where N is # samples, C is # classes. + Tuple composed of multiple logits are supported in order (main_logits, aux_logits) + for auxiliary loss used in networks like inception_v3. +labels (Tensor): Ground truth label. Shape: [N] or [N, C]. + (1) Shape (N), sparse labels representing the class indices. Must be int type. + (2) Shape [N, C], dense labels representing the ground truth class probability values, + or the one-hot labels. Must be float type.

+
+
+ Source code in mindcv/loss/cross_entropy_smooth.py +
 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
class CrossEntropySmooth(nn.LossBase):
+    """
+    Cross entropy loss with label smoothing.
+    Apply softmax activation function to input `logits`, and uses the given logits to compute cross entropy
+    between the logits and the label.
+
+    Args:
+        smoothing: Label smoothing factor, a regularization tool used to prevent the model
+            from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.
+        aux_factor: Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs
+            (i.e., deep supervision), like inception_v3.  Default: 0.0.
+        reduction: Apply specific reduction method to the output: 'mean' or 'sum'. Default: 'mean'.
+        weight (Tensor): Class weight. Shape [C]. A rescaling weight applied to the loss of each batch element.
+            Data type must be float16 or float32.
+
+    Inputs:
+        logits (Tensor or Tuple of Tensor): Input logits. Shape [N, C], where N is # samples, C is # classes.
+            Tuple composed of multiple logits are supported in order (main_logits, aux_logits)
+            for auxiliary loss used in networks like inception_v3.
+        labels (Tensor): Ground truth label. Shape: [N] or [N, C].
+            (1) Shape (N), sparse labels representing the class indices. Must be int type.
+            (2) Shape [N, C], dense labels representing the ground truth class probability values,
+            or the one-hot labels. Must be float type.
+    """
+
+    def __init__(self, smoothing=0.0, aux_factor=0.0, reduction="mean", weight=None):
+        super().__init__()
+        self.smoothing = smoothing
+        self.aux_factor = aux_factor
+        self.reduction = reduction
+        self.weight = weight
+
+    def construct(self, logits, labels):
+        loss_aux = 0
+
+        if isinstance(logits, tuple):
+            main_logits = logits[0]
+            for aux in logits[1:]:
+                if self.aux_factor > 0:
+                    loss_aux += F.cross_entropy(
+                        aux, labels, weight=self.weight, reduction=self.reduction, label_smoothing=self.smoothing
+                    )
+        else:
+            main_logits = logits
+
+        loss_logits = F.cross_entropy(
+            main_logits, labels, weight=self.weight, reduction=self.reduction, label_smoothing=self.smoothing
+        )
+        loss = loss_logits + self.aux_factor * loss_aux
+        return loss
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Binary Cross Entropy

+ + +
+ + + +

+ mindcv.loss.binary_cross_entropy_smooth.BinaryCrossEntropySmooth + + +

+ + +
+

+ Bases: nn.LossBase

+ + +

Binary cross entropy loss with label smoothing. +Apply sigmoid activation function to input logits, and uses the given logits to compute binary cross entropy +between the logits and the label.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
smoothing +
+

Label smoothing factor, a regularization tool used to prevent the model +from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.

+
+

+ + DEFAULT: + 0.0 + +

+
aux_factor +
+

Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs +(i.e., deep supervision), like inception_v3. Default: 0.0.

+
+

+ + DEFAULT: + 0.0 + +

+
reduction +
+

Apply specific reduction method to the output: 'mean' or 'sum'. Default: 'mean'.

+
+

+ + DEFAULT: + 'mean' + +

+
weight +
+

Class weight. A rescaling weight applied to the loss of each batch element. Shape [C]. +It can be broadcast to a tensor with shape of logits. Data type must be float16 or float32.

+
+

+ + TYPE: + Tensor + + + DEFAULT: + None + +

+
pos_weight +
+

Positive weight for each class. A weight of positive examples. Shape [C]. +Must be a vector with length equal to the number of classes. +It can be broadcast to a tensor with shape of logits. Data type must be float16 or float32.

+
+

+ + TYPE: + Tensor + + + DEFAULT: + None + +

+
+ +
+ Inputs +

logits (Tensor or Tuple of Tensor): (1) Input logits. Shape [N, C], where N is # samples, C is # classes. + Or (2) Tuple of two input logits (main_logits and aux_logits) for auxiliary loss. +labels (Tensor): Ground truth label, (1) shape [N, C], has the same shape as logits or (2) shape [N]. + can be a class probability matrix or one-hot labels. Data type must be float16 or float32.

+
+
+ Source code in mindcv/loss/binary_cross_entropy_smooth.py +
 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
+87
class BinaryCrossEntropySmooth(nn.LossBase):
+    """
+    Binary cross entropy loss with label smoothing.
+    Apply sigmoid activation function to input `logits`, and uses the given logits to compute binary cross entropy
+    between the logits and the label.
+
+    Args:
+        smoothing: Label smoothing factor, a regularization tool used to prevent the model
+            from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.
+        aux_factor: Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs
+            (i.e., deep supervision), like inception_v3.  Default: 0.0.
+        reduction: Apply specific reduction method to the output: 'mean' or 'sum'. Default: 'mean'.
+        weight (Tensor): Class weight. A rescaling weight applied to the loss of each batch element. Shape [C].
+            It can be broadcast to a tensor with shape of `logits`. Data type must be float16 or float32.
+        pos_weight (Tensor): Positive weight for each class. A weight of positive examples. Shape [C].
+            Must be a vector with length equal to the number of classes.
+            It can be broadcast to a tensor with shape of `logits`. Data type must be float16 or float32.
+
+    Inputs:
+        logits (Tensor or Tuple of Tensor): (1) Input logits. Shape [N, C], where N is # samples, C is # classes.
+            Or (2) Tuple of two input logits (main_logits and aux_logits) for auxiliary loss.
+        labels (Tensor): Ground truth label, (1) shape [N, C], has the same shape as `logits` or (2) shape [N].
+            can be a class probability matrix or one-hot labels. Data type must be float16 or float32.
+    """
+
+    def __init__(self, smoothing=0.0, aux_factor=0.0, reduction="mean", weight=None, pos_weight=None):
+        super().__init__()
+        self.smoothing = smoothing
+        self.aux_factor = aux_factor
+        self.reduction = reduction
+        self.weight = weight
+        self.pos_weight = pos_weight
+        self.ones = P.OnesLike()
+        self.one_hot = P.OneHot()
+
+    def construct(self, logits, labels):
+        loss_aux = 0
+        aux_logits = None
+
+        if isinstance(logits, tuple):
+            main_logits = logits[0]
+        else:
+            main_logits = logits
+
+        if main_logits.size != labels.size:
+            # We must explicitly convert the label to one-hot,
+            # for binary_cross_entropy_with_logits restricting input and label have the same shape.
+            class_dim = 0 if main_logits.ndim == 1 else 1
+            n_classes = main_logits.shape[class_dim]
+            labels = self.one_hot(labels, n_classes, Tensor(1.0), Tensor(0.0))
+
+        ones_input = self.ones(main_logits)
+        if self.weight is not None:
+            weight = self.weight
+        else:
+            weight = ones_input
+        if self.pos_weight is not None:
+            pos_weight = self.pos_weight
+        else:
+            pos_weight = ones_input
+
+        if self.smoothing > 0.0:
+            class_dim = 0 if main_logits.ndim == 1 else -1
+            n_classes = main_logits.shape[class_dim]
+            labels = labels * (1 - self.smoothing) + self.smoothing / n_classes
+
+        if self.aux_factor > 0 and aux_logits is not None:
+            for aux_logits in logits[1:]:
+                loss_aux += F.binary_cross_entropy_with_logits(
+                    aux_logits, labels, weight=weight, pos_weight=pos_weight, reduction=self.reduction
+                )
+        # else:
+        #    warnings.warn("There are logit tuple input, but the auxiliary loss factor is 0.")
+
+        loss_logits = F.binary_cross_entropy_with_logits(
+            main_logits, labels, weight=weight, pos_weight=pos_weight, reduction=self.reduction
+        )
+
+        loss = loss_logits + self.aux_factor * loss_aux
+
+        return loss
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/reference/models.layers/index.html b/reference/models.layers/index.html new file mode 100644 index 000000000..7b7c0436a --- /dev/null +++ b/reference/models.layers/index.html @@ -0,0 +1,2976 @@ + + + + + + + + + + + + + + + + + + + + + + + + models.layers - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Common Layers in Model

+

Activation

+ + +
+ + + +

+ mindcv.models.layers.activation.Swish + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Swish activation function: x * sigmoid(x).

+ +
+ Return +

Tensor

+
+
+ Example +
+
+
+

x = Tensor(((20, 16), (50, 50)), mindspore.float32) +Swish()(x)

+
+
+
+
+
+ Source code in mindcv/models/layers/activation.py +
10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
class Swish(nn.Cell):
+    """
+    Swish activation function: x * sigmoid(x).
+
+    Args:
+        None
+
+    Return:
+        Tensor
+
+    Example:
+        >>> x = Tensor(((20, 16), (50, 50)), mindspore.float32)
+        >>> Swish()(x)
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.result = None
+        self.sigmoid = nn.Sigmoid()
+
+    def construct(self, x):
+        result = x * self.sigmoid(x)
+        return result
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

DropPath

+ + +
+ + + +

+ mindcv.models.layers.drop_path.DropPath + + +

+ + +
+

+ Bases: nn.Cell

+ + +

DropPath (Stochastic Depth) regularization layers

+ +
+ Source code in mindcv/models/layers/drop_path.py +
12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
class DropPath(nn.Cell):
+    """DropPath (Stochastic Depth) regularization layers"""
+
+    def __init__(
+        self,
+        drop_prob: float = 0.0,
+        scale_by_keep: bool = True,
+    ) -> None:
+        super().__init__()
+        self.keep_prob = 1.0 - drop_prob
+        self.scale_by_keep = scale_by_keep
+        self.dropout = Dropout(p=drop_prob)
+
+    def construct(self, x: Tensor) -> Tensor:
+        if self.keep_prob == 1.0 or not self.training:
+            return x
+        shape = (x.shape[0],) + (1,) * (x.ndim - 1)
+        random_tensor = self.dropout(ones(shape))
+        if not self.scale_by_keep:
+            random_tensor = ops.mul(random_tensor, self.keep_prob)
+        return x * random_tensor
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Identity

+ + +
+ + + +

+ mindcv.models.layers.identity.Identity + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Identity

+ +
+ Source code in mindcv/models/layers/identity.py +
5
+6
+7
+8
+9
class Identity(nn.Cell):
+    """Identity"""
+
+    def construct(self, x):
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

MLP

+ + +
+ + + +

+ mindcv.models.layers.mlp.Mlp + + +

+ + +
+

+ Bases: nn.Cell

+ + +
+ Source code in mindcv/models/layers/mlp.py +
10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
class Mlp(nn.Cell):
+    def __init__(
+        self,
+        in_features: int,
+        hidden_features: Optional[int] = None,
+        out_features: Optional[int] = None,
+        act_layer: Optional[nn.Cell] = nn.GELU,
+        drop: float = 0.0,
+    ) -> None:
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Dense(in_channels=in_features, out_channels=hidden_features, has_bias=True)
+        self.act = act_layer()
+        self.fc2 = nn.Dense(in_channels=hidden_features, out_channels=out_features, has_bias=True)
+        self.drop = Dropout(p=drop)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Patch Embedding

+ + +
+ + + +

+ mindcv.models.layers.patch_embed.PatchEmbed + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Image to Patch Embedding

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
image_size +
+

Image size. Default: 224.

+
+

+ + TYPE: + int + + + DEFAULT: + 224 + +

+
patch_size +
+

Patch token size. Default: 4.

+
+

+ + TYPE: + int + + + DEFAULT: + 4 + +

+
in_chans +
+

Number of input image channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
embed_dim +
+

Number of linear projection output channels. Default: 96.

+
+

+ + TYPE: + int + + + DEFAULT: + 96 + +

+
norm_layer +
+

Normalization layer. Default: None

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + None + +

+
+ +
+ Source code in mindcv/models/layers/patch_embed.py +
10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
class PatchEmbed(nn.Cell):
+    """Image to Patch Embedding
+
+    Args:
+        image_size (int): Image size.  Default: 224.
+        patch_size (int): Patch token size. Default: 4.
+        in_chans (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        norm_layer (nn.Cell, optional): Normalization layer. Default: None
+    """
+
+    def __init__(
+        self,
+        image_size: int = 224,
+        patch_size: int = 4,
+        in_chans: int = 3,
+        embed_dim: int = 96,
+        norm_layer: Optional[nn.Cell] = None,
+    ) -> None:
+        super().__init__()
+        image_size = to_2tuple(image_size)
+        patch_size = to_2tuple(patch_size)
+        patches_resolution = [image_size[0] // patch_size[0], image_size[1] // patch_size[1]]
+        self.image_size = image_size
+        self.patch_size = patch_size
+        self.patches_resolution = patches_resolution
+        self.num_patches = patches_resolution[0] * patches_resolution[1]
+
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+
+        self.proj = nn.Conv2d(in_channels=in_chans, out_channels=embed_dim, kernel_size=patch_size, stride=patch_size,
+                              pad_mode='pad', has_bias=True, weight_init="TruncatedNormal")
+
+        if norm_layer is not None:
+            if isinstance(embed_dim, int):
+                embed_dim = (embed_dim,)
+            self.norm = norm_layer(embed_dim, epsilon=1e-5)
+        else:
+            self.norm = None
+
+    def construct(self, x: Tensor) -> Tensor:
+        """docstring"""
+        B = x.shape[0]
+        # FIXME look at relaxing size constraints
+        x = ops.Reshape()(self.proj(x), (B, self.embed_dim, -1))  # B Ph*Pw C
+        x = ops.Transpose()(x, (0, 2, 1))
+
+        if self.norm is not None:
+            x = self.norm(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.layers.patch_embed.PatchEmbed.construct(x) + +

+ + +
+ +

docstring

+ +
+ Source code in mindcv/models/layers/patch_embed.py +
51
+52
+53
+54
+55
+56
+57
+58
+59
+60
def construct(self, x: Tensor) -> Tensor:
+    """docstring"""
+    B = x.shape[0]
+    # FIXME look at relaxing size constraints
+    x = ops.Reshape()(self.proj(x), (B, self.embed_dim, -1))  # B Ph*Pw C
+    x = ops.Transpose()(x, (0, 2, 1))
+
+    if self.norm is not None:
+        x = self.norm(x)
+    return x
+
+
+
+ +
+ + + +
+ +
+ +

Pooling

+ + +
+ + + +

+ mindcv.models.layers.pooling.GlobalAvgPooling + + +

+ + +
+

+ Bases: nn.Cell

+ + +

GlobalAvgPooling, same as torch.nn.AdaptiveAvgPool2d when output shape is 1

+ +
+ Source code in mindcv/models/layers/pooling.py +
 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
class GlobalAvgPooling(nn.Cell):
+    """
+    GlobalAvgPooling, same as torch.nn.AdaptiveAvgPool2d when output shape is 1
+    """
+
+    def __init__(self, keep_dims: bool = False) -> None:
+        super().__init__()
+        self.keep_dims = keep_dims
+
+    def construct(self, x):
+        x = ops.mean(x, axis=(2, 3), keep_dims=self.keep_dims)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Selective Kernel

+ + +
+ + + +

+ mindcv.models.layers.selective_kernel.SelectiveKernelAttn + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Selective Kernel Attention Module +Selective Kernel attention mechanism factored out into its own module.

+ +
+ Source code in mindcv/models/layers/selective_kernel.py +
24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
class SelectiveKernelAttn(nn.Cell):
+    """Selective Kernel Attention Module
+    Selective Kernel attention mechanism factored out into its own module.
+    """
+
+    def __init__(
+        self,
+        channels: int,
+        num_paths: int = 2,
+        attn_channels: int = 32,
+        activation: Optional[nn.Cell] = nn.ReLU,
+        norm: Optional[nn.Cell] = nn.BatchNorm2d,
+    ):
+        super().__init__()
+        self.num_paths = num_paths
+        self.mean = GlobalAvgPooling(keep_dims=True)
+        self.fc_reduce = nn.Conv2d(channels, attn_channels, kernel_size=1, has_bias=False)
+        self.bn = norm(attn_channels)
+        self.act = activation()
+        self.fc_select = nn.Conv2d(attn_channels, channels * num_paths, kernel_size=1)
+        self.softmax = nn.Softmax(axis=1)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.mean((x.sum(1)))
+        x = self.fc_reduce(x)
+        x = self.bn(x)
+        x = self.act(x)
+        x = self.fc_select(x)
+        b, c, h, w = x.shape
+        x = x.reshape((b, self.num_paths, c // self.num_paths, h, w))
+        x = self.softmax(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ +
+ + + +

+ mindcv.models.layers.selective_kernel.SelectiveKernel + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Selective Kernel Convolution Module +As described in Selective Kernel Networks (https://arxiv.org/abs/1903.06586) with some modifications. +Largest change is the input split, which divides the input channels across each convolution path, this can +be viewed as a grouping of sorts, but the output channel counts expand to the module level value. This keeps +the parameter count from ballooning when the convolutions themselves don't have groups, but still provides +a noteworthy increase in performance over similar param count models without this attention layer. -Ross W

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
in_channels +
+

module input (feature) channel count

+
+

+ + TYPE: + int + +

+
out_channels +
+

module output (feature) channel count

+
+

+ + TYPE: + int + + + DEFAULT: + None + +

+
kernel_size +
+

kernel size for each convolution branch

+
+

+ + TYPE: + (int, list) + + + DEFAULT: + None + +

+
stride +
+

stride for convolutions

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
dilation +
+

dilation for module as a whole, impacts dilation of each branch

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
groups +
+

number of groups for each branch

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
rd_ratio +
+

reduction factor for attention features

+
+

+ + TYPE: + (int, float) + + + DEFAULT: + 1.0 / 16 + +

+
rd_channels(int) +
+

reduction channels can be specified directly by arg (if rd_channels is set)

+
+

+

+
rd_divisor(int) +
+

divisor can be specified to keep channels

+
+

+

+
keep_3x3 +
+

keep all branch convolution kernels as 3x3, changing larger kernels for dilations

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
split_input +
+

split input channels evenly across each convolution branch, keeps param count lower, +can be viewed as grouping by path, output expands to module out_channels count

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
activation +
+

activation layer to use

+
+

+ + TYPE: + nn.Module + + + DEFAULT: + nn.ReLU + +

+
norm +
+

batchnorm/norm layer to use

+
+

+ + TYPE: + nn.Module + + + DEFAULT: + nn.BatchNorm2d + +

+
+ +
+ Source code in mindcv/models/layers/selective_kernel.py +
 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
class SelectiveKernel(nn.Cell):
+    """Selective Kernel Convolution Module
+    As described in Selective Kernel Networks (https://arxiv.org/abs/1903.06586) with some modifications.
+    Largest change is the input split, which divides the input channels across each convolution path, this can
+    be viewed as a grouping of sorts, but the output channel counts expand to the module level value. This keeps
+    the parameter count from ballooning when the convolutions themselves don't have groups, but still provides
+    a noteworthy increase in performance over similar param count models without this attention layer. -Ross W
+    Args:
+        in_channels (int):  module input (feature) channel count
+        out_channels (int):  module output (feature) channel count
+        kernel_size (int, list): kernel size for each convolution branch
+        stride (int): stride for convolutions
+        dilation (int): dilation for module as a whole, impacts dilation of each branch
+        groups (int): number of groups for each branch
+        rd_ratio (int, float): reduction factor for attention features
+        rd_channels(int): reduction channels can be specified directly by arg (if rd_channels is set)
+        rd_divisor(int): divisor can be specified to keep channels
+        keep_3x3 (bool): keep all branch convolution kernels as 3x3, changing larger kernels for dilations
+        split_input (bool): split input channels evenly across each convolution branch, keeps param count lower,
+            can be viewed as grouping by path, output expands to module out_channels count
+        activation (nn.Module): activation layer to use
+        norm (nn.Module): batchnorm/norm layer to use
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: Optional[int] = None,
+        kernel_size: Optional[Union[int, List]] = None,
+        stride: int = 1,
+        dilation: int = 1,
+        groups: int = 1,
+        rd_ratio: float = 1.0 / 16,
+        rd_channels: Optional[int] = None,
+        rd_divisor: int = 8,
+        keep_3x3: bool = True,
+        split_input: bool = True,
+        activation: Optional[nn.Cell] = nn.ReLU,
+        norm: Optional[nn.Cell] = nn.BatchNorm2d,
+    ):
+        super().__init__()
+        out_channels = out_channels or in_channels
+        kernel_size = kernel_size or [3, 5]  # default to one 3x3 and one 5x5 branch. 5x5 -> 3x3 + dilation
+        _kernel_valid(kernel_size)
+        if not isinstance(kernel_size, list):
+            kernel_size = [kernel_size] * 2
+        if keep_3x3:
+            dilation = [dilation * (k - 1) // 2 for k in kernel_size]
+            kernel_size = [3] * len(kernel_size)
+        else:
+            dilation = [dilation] * len(kernel_size)
+        self.num_paths = len(kernel_size)
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.split_input = split_input
+        if self.split_input:
+            assert in_channels % self.num_paths == 0
+            in_channels = in_channels // self.num_paths
+        groups = min(out_channels, groups)
+        self.split = Split(split_size_or_sections=self.in_channels // self.num_paths, output_num=self.num_paths, axis=1)
+
+        self.paths = nn.CellList([
+            Conv2dNormActivation(in_channels, out_channels, kernel_size=k, stride=stride, groups=groups,
+                                 dilation=d, activation=activation, norm=norm)
+            for k, d in zip(kernel_size, dilation)
+        ])
+
+        attn_channels = rd_channels or make_divisible(out_channels * rd_ratio, divisor=rd_divisor)
+        self.attn = SelectiveKernelAttn(out_channels, self.num_paths, attn_channels)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x_paths = []
+        if self.split_input:
+            x_split = self.split(x)
+            for i, op in enumerate(self.paths):
+                x_paths.append(op(x_split[i]))
+        else:
+            for op in self.paths:
+                x_paths.append(op(x))
+
+        x = ops.stack(x_paths, axis=1)
+        x_attn = self.attn(x)
+        x = x * x_attn
+        x = x.sum(1)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Squeeze and Excite

+ + +
+ + + +

+ mindcv.models.layers.squeeze_excite.SqueezeExcite + + +

+ + +
+

+ Bases: nn.Cell

+ + +

SqueezeExcite Module as defined in original SE-Nets with a few additions.

+ +
+ Additions include +
    +
  • divisor can be specified to keep channels % div == 0 (default: 8)
  • +
  • reduction channels can be specified directly by arg (if rd_channels is set)
  • +
  • reduction channels can be specified by float rd_ratio (default: 1/16)
  • +
  • customizable activation, normalization, and gate layer
  • +
+
+
+ Source code in mindcv/models/layers/squeeze_excite.py +
14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
class SqueezeExcite(nn.Cell):
+    """SqueezeExcite Module as defined in original SE-Nets with a few additions.
+    Additions include:
+        * divisor can be specified to keep channels % div == 0 (default: 8)
+        * reduction channels can be specified directly by arg (if rd_channels is set)
+        * reduction channels can be specified by float rd_ratio (default: 1/16)
+        * customizable activation, normalization, and gate layer
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        rd_ratio: float = 1.0 / 16,
+        rd_channels: Optional[int] = None,
+        rd_divisor: int = 8,
+        norm: Optional[nn.Cell] = None,
+        act_layer: nn.Cell = nn.ReLU,
+        gate_layer: nn.Cell = nn.Sigmoid,
+    ) -> None:
+        super().__init__()
+        self.norm = norm
+        self.act = act_layer()
+        self.gate = gate_layer()
+        if not rd_channels:
+            rd_channels = make_divisible(in_channels * rd_ratio, rd_divisor)
+
+        self.conv_reduce = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=rd_channels,
+            kernel_size=1,
+            has_bias=True,
+        )
+        if self.norm:
+            self.bn = nn.BatchNorm2d(rd_channels)
+        self.conv_expand = nn.Conv2d(
+            in_channels=rd_channels,
+            out_channels=in_channels,
+            kernel_size=1,
+            has_bias=True,
+        )
+        self.pool = GlobalAvgPooling(keep_dims=True)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x_se = self.pool(x)
+        x_se = self.conv_reduce(x_se)
+        if self.norm:
+            x_se = self.bn(x_se)
+        x_se = self.act(x_se)
+        x_se = self.conv_expand(x_se)
+        x_se = self.gate(x_se)
+        x = x * x_se
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ +
+ + + +

+ mindcv.models.layers.squeeze_excite.SqueezeExciteV2 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

SqueezeExcite Module as defined in original SE-Nets with a few additions. +V1 uses 1x1conv to replace fc layers, and V2 uses nn.Dense to implement directly.

+ +
+ Source code in mindcv/models/layers/squeeze_excite.py +
 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
class SqueezeExciteV2(nn.Cell):
+    """SqueezeExcite Module as defined in original SE-Nets with a few additions.
+    V1 uses 1x1conv to replace fc layers, and V2 uses nn.Dense to implement directly.
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        rd_ratio: float = 1.0 / 16,
+        rd_channels: Optional[int] = None,
+        rd_divisor: int = 8,
+        norm: Optional[nn.Cell] = None,
+        act_layer: nn.Cell = nn.ReLU,
+        gate_layer: nn.Cell = nn.Sigmoid,
+    ) -> None:
+        super().__init__()
+        self.norm = norm
+        self.act = act_layer()
+        self.gate = gate_layer()
+        if not rd_channels:
+            rd_channels = make_divisible(in_channels * rd_ratio, rd_divisor)
+
+        self.conv_reduce = nn.Dense(
+            in_channels=in_channels,
+            out_channels=rd_channels,
+            has_bias=True,
+        )
+        if self.norm:
+            self.bn = nn.BatchNorm2d(rd_channels)
+        self.conv_expand = nn.Dense(
+            in_channels=rd_channels,
+            out_channels=in_channels,
+            has_bias=True,
+        )
+        self.pool = GlobalAvgPooling(keep_dims=False)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x_se = self.pool(x)
+        x_se = self.conv_reduce(x_se)
+        if self.norm:
+            x_se = self.bn(x_se)
+        x_se = self.act(x_se)
+        x_se = self.conv_expand(x_se)
+        x_se = self.gate(x_se)
+        x_se = ops.expand_dims(x_se, -1)
+        x_se = ops.expand_dims(x_se, -1)
+        x = x * x_se
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/reference/models/index.html b/reference/models/index.html new file mode 100644 index 000000000..191f954a4 --- /dev/null +++ b/reference/models/index.html @@ -0,0 +1,40359 @@ + + + + + + + + + + + + + + + + + + + + + + + + models - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Models

+

Create Model

+ + + +
+ + + +

+mindcv.models.model_factory.create_model(model_name, num_classes=1000, pretrained=False, in_channels=3, checkpoint_path='', ema=False, auto_mapping=False, **kwargs) + +

+ + +
+ +

Creates model by name.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
model_name +
+

The name of model.

+
+

+ + TYPE: + str + +

+
num_classes +
+

The number of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
pretrained +
+

Whether to load the pretrained model. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
in_channels +
+

The input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
checkpoint_path +
+

The path of checkpoint files. Default: "".

+
+

+ + TYPE: + str + + + DEFAULT: + '' + +

+
ema +
+

Whether use ema method. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
auto_mapping +
+

Whether to automatically map the names of checkpoint weights +to the names of model weights when there are differences in names. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/model_factory.py +
 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
def create_model(
+    model_name: str,
+    num_classes: int = 1000,
+    pretrained: bool = False,
+    in_channels: int = 3,
+    checkpoint_path: str = "",
+    ema: bool = False,
+    auto_mapping: bool = False,
+    **kwargs,
+):
+    r"""Creates model by name.
+
+    Args:
+        model_name (str):  The name of model.
+        num_classes (int): The number of classes. Default: 1000.
+        pretrained (bool): Whether to load the pretrained model. Default: False.
+        in_channels (int): The input channels. Default: 3.
+        checkpoint_path (str): The path of checkpoint files. Default: "".
+        ema (bool): Whether use ema method. Default: False.
+        auto_mapping (bool): Whether to automatically map the names of checkpoint weights
+            to the names of model weights when there are differences in names. Default: False.
+    """
+
+    if checkpoint_path != "" and pretrained:
+        raise ValueError("checkpoint_path is mutually exclusive with pretrained")
+
+    model_args = dict(num_classes=num_classes, pretrained=pretrained, in_channels=in_channels)
+    kwargs = {k: v for k, v in kwargs.items() if v is not None}
+
+    if not is_model(model_name):
+        raise RuntimeError(f"Unknown model {model_name}")
+
+    create_fn = model_entrypoint(model_name)
+    model = create_fn(**model_args, **kwargs)
+
+    if checkpoint_path:
+        load_model_checkpoint(model, checkpoint_path, ema, auto_mapping)
+
+    return model
+
+
+
+ +

bit

+ + +
+ + + +

+ mindcv.models.bit.BiT_ResNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

BiT_ResNet model class, based on +"Big Transfer (BiT): General Visual Representation Learning" <https://arxiv.org/abs/1912.11370>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
block(Union[Bottleneck]) +
+

block of BiT_ResNetv2.

+
+

+

+
layers(tuple(int)) +
+

number of layers of each stage.

+
+

+

+
wf(int) +
+

width of each layer. Default: 1.

+
+

+

+
num_classes(int) +
+

number of classification classes. Default: 1000.

+
+

+

+
in_channels(int) +
+

number the channels of the input. Default: 3.

+
+

+

+
groups(int) +
+

number of groups for group conv in blocks. Default: 1.

+
+

+

+
base_width(int) +
+

base width of pre group hidden channel in blocks. Default: 64.

+
+

+

+
norm(nn.Cell) +
+

normalization layer in blocks. Default: None.

+
+

+

+
+ +
+ Source code in mindcv/models/bit.py +
149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
class BiT_ResNet(nn.Cell):
+    r"""BiT_ResNet model class, based on
+    `"Big Transfer (BiT): General Visual Representation Learning" <https://arxiv.org/abs/1912.11370>`_
+    Args:
+        block(Union[Bottleneck]): block of BiT_ResNetv2.
+        layers(tuple(int)): number of layers of each stage.
+        wf(int): width of each layer. Default: 1.
+        num_classes(int): number of classification classes. Default: 1000.
+        in_channels(int): number the channels of the input. Default: 3.
+        groups(int): number of groups for group conv in blocks. Default: 1.
+        base_width(int): base width of pre group hidden channel in blocks. Default: 64.
+        norm(nn.Cell): normalization layer in blocks. Default: None.
+    """
+
+    def __init__(
+        self,
+        block: Type[Union[Bottleneck]],
+        layers: List[int],
+        wf: int = 1,
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        groups: int = 1,
+        base_width: int = 64,
+        norm: Optional[nn.Cell] = None,
+    ) -> None:
+        super().__init__()
+
+        if norm is None:
+            norm = nn.GroupNorm
+
+        self.norm: nn.Cell = norm  # add type hints to make pylint happy
+        self.input_channels = 64 * wf
+        self.groups = groups
+        self.base_with = base_width
+
+        self.conv1 = StdConv2d(in_channels, self.input_channels, kernel_size=7,
+                               stride=2, pad_mode="pad", padding=3)
+        self.pad = nn.ConstantPad2d(1, 0)
+        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="valid")
+
+        self.layer1 = self._make_layer(block, 64 * wf, layers[0])
+        self.layer2 = self._make_layer(block, 128 * wf, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256 * wf, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512 * wf, layers[3], stride=2)
+
+        self.gn = norm(32, 2048 * wf)
+        self.relu = nn.ReLU()
+        self.pool = GlobalAvgPooling(keep_dims=True)
+        self.classifier = nn.Conv2d(512 * block.expansion * wf, num_classes, kernel_size=1, has_bias=True)
+
+    def _make_layer(
+        self,
+        block: Type[Union[Bottleneck]],
+        channels: int,
+        block_nums: int,
+        stride: int = 1,
+    ) -> nn.SequentialCell:
+        """build model depending on cfgs"""
+        down_sample = None
+
+        if stride != 1 or self.input_channels != channels * block.expansion:
+            down_sample = nn.SequentialCell([
+                StdConv2d(self.input_channels, channels * block.expansion, kernel_size=1, stride=stride),
+            ])
+
+        layers = []
+        layers.append(
+            block(
+                self.input_channels,
+                channels,
+                stride=stride,
+                down_sample=down_sample,
+                groups=self.groups,
+                base_width=self.base_with,
+                norm=self.norm,
+            )
+        )
+        self.input_channels = channels * block.expansion
+
+        for _ in range(1, block_nums):
+            layers.append(
+                block(
+                    self.input_channels,
+                    channels,
+                    groups=self.groups,
+                    base_width=self.base_with,
+                    norm=self.norm,
+                )
+            )
+
+        return nn.SequentialCell(layers)
+
+    def root(self, x: Tensor) -> Tensor:
+        x = self.conv1(x)
+        x = self.pad(x)
+        x = self.max_pool(x)
+        return x
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        """Network forward feature extraction."""
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.gn(x)
+        x = self.relu(x)
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.root(x)
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        assert x.shape[-2:] == (1, 1)  # We should have no spatial shape left.
+        return x[..., 0, 0]
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.bit.BiT_ResNet.forward_features(x) + +

+ + +
+ +

Network forward feature extraction.

+ +
+ Source code in mindcv/models/bit.py +
247
+248
+249
+250
+251
+252
+253
def forward_features(self, x: Tensor) -> Tensor:
+    """Network forward feature extraction."""
+    x = self.layer1(x)
+    x = self.layer2(x)
+    x = self.layer3(x)
+    x = self.layer4(x)
+    return x
+
+
+
+ +
+ + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.bit.BiT_resnet101(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 101 layers ResNet model. +Refer to the base class models.BiT_Resnet for more details.

+ +
+ Source code in mindcv/models/bit.py +
298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
@register_model
+def BiT_resnet101(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 101 layers ResNet model.
+    Refer to the base class `models.BiT_Resnet` for more details.
+    """
+    default_cfg = default_cfgs["BiT_resnet101"]
+    model = BiT_ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.bit.BiT_resnet50(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers ResNet model. +Refer to the base class models.BiT_Resnet for more details.

+ +
+ Source code in mindcv/models/bit.py +
270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
@register_model
+def BiT_resnet50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 50 layers ResNet model.
+    Refer to the base class `models.BiT_Resnet` for more details.
+    """
+    default_cfg = default_cfgs["BiT_resnet50"]
+    model = BiT_ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.bit.BiT_resnet50x3(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers ResNet model. +Refer to the base class models.BiT_Resnet for more details.

+ +
+ Source code in mindcv/models/bit.py +
284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
@register_model
+def BiT_resnet50x3(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 50 layers ResNet model.
+     Refer to the base class `models.BiT_Resnet` for more details.
+     """
+    default_cfg = default_cfgs["BiT_resnet50x3"]
+    model = BiT_ResNet(Bottleneck, [3, 4, 6, 3], wf=3, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

cait

+ + +
+ + + +

+ mindcv.models.cait.CaiT + + +

+ + +
+

+ Bases: nn.Cell

+ + +
+ Source code in mindcv/models/cait.py +
241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
class CaiT(nn.Cell):
+    def __init__(self,
+                 img_size: int = 224,
+                 patch_size: int = 16,
+                 in_channels: int = 3,
+                 num_classes: int = 1000,
+                 embed_dim: int = 768,
+                 depth: int = 12,
+                 num_heads: int = 12,
+                 mlp_ratio: float = 4.,
+                 qkv_bias: bool = False,
+                 qk_scale: float = None,
+                 drop_rate: float = 0.,
+                 attn_drop_rate: float = 0.,
+                 drop_path_rate: float = 0.,
+                 norm_layer: nn.Cell = nn.LayerNorm,
+                 act_layer: nn.Cell = nn.GELU,
+                 init_values: float = 1e-4,
+                 depth_token_only: int = 2,
+                 mlp_ratio_clstk: float = 4.0) -> None:
+        super(CaiT, self).__init__()
+        self.num_classes = num_classes
+        self.embed_dim = embed_dim
+
+        self.patch_embed = PatchEmbed(image_size=img_size,
+                                      patch_size=patch_size,
+                                      in_chans=in_channels,
+                                      embed_dim=embed_dim)
+
+        num_patches = self.patch_embed.num_patches
+
+        zeros = ops.Zeros()
+        self.cls_token = Parameter(zeros((1, 1, embed_dim), ms.float32))
+        self.pos_embed = Parameter(zeros((1, num_patches, embed_dim), ms.float32))
+        self.pos_drop = Dropout(p=drop_rate)
+
+        dpr = [drop_path_rate for i in range(depth)]
+
+        self.blocks = []
+        self.blocks_token_only = []
+
+        self.blocks = nn.CellList([
+            LayerScaleBlockSA(
+                dim=embed_dim,
+                num_heads=num_heads,
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop_rate=drop_rate,
+                attn_drop_rate=attn_drop_rate,
+                drop_path_rate=dpr[i],
+                act_layer=act_layer,
+                norm_layer=norm_layer,
+                init_values=init_values)
+            for i in range(depth)])
+        self.blocks_token_only = nn.CellList([
+            LayerScaleBlockCA(
+                dim=embed_dim,
+                num_heads=num_heads,
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop_rate=0.0,
+                attn_drop_rate=0.0,
+                drop_path_rate=0.0,
+                act_layer=act_layer,
+                norm_layer=norm_layer,
+                init_values=init_values)
+            for i in range(depth_token_only)])
+
+        self.norm = norm_layer((embed_dim,))
+
+        self.head = nn.Dense(embed_dim, num_classes) if num_classes > 0 else nn.Identity()
+
+        self.pos_embed = init.initializer(TruncatedNormal(sigma=0.02), self.pos_embed.shape, ms.float32)
+        self.cls_token = init.initializer(TruncatedNormal(sigma=0.02), self.cls_token.shape, ms.float32)
+
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        for name, m in self.cells_and_names():
+            if isinstance(m, nn.Dense):
+                m.weight = init.initializer(TruncatedNormal(sigma=0.02), m.weight.shape, ms.float32)
+                if m.bias is not None:
+                    m.bias.set_data(init.initializer(init.Constant(0), m.bias.shape))
+            elif isinstance(m, nn.LayerNorm):
+                m.beta.set_data(init.initializer(init.Constant(0), m.beta.shape))
+                m.gamma.set_data(init.initializer(init.Constant(1), m.gamma.shape))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        B = x.shape[0]
+        x = self.patch_embed(x)
+
+        cls_tokens = ops.broadcast_to(self.cls_token, (B, -1, -1))
+
+        x = x + self.pos_embed
+        x = self.pos_drop(x)
+
+        for i , blk in enumerate(self.blocks):
+            x = blk(x)
+        for i , blk in enumerate(self.blocks_token_only):
+            cls_tokens = blk(x, cls_tokens)
+
+        x = ops.concat((cls_tokens, x), axis=1)
+
+        x = self.norm(x)
+        return x[:, 0]
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.head(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.cait.cait_m36_384(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/cait.py +
419
+420
+421
+422
+423
+424
+425
+426
+427
+428
@register_model
+def cait_m36_384(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:
+    model = CaiT(img_size=384, patch_size=16, in_channels=in_channels, num_classes=num_classes,
+                 embed_dim=768, depth=36, num_heads=16, mlp_ratio=4, qkv_bias=False,
+                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-6, depth_token_only=2,
+                 **kwargs)
+
+    if pretrained:
+        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cait.cait_m48_448(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/cait.py +
431
+432
+433
+434
+435
+436
+437
+438
+439
+440
@register_model
+def cait_m48_448(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:
+    model = CaiT(img_size=448, patch_size=16, in_channels=in_channels, num_classes=num_classes,
+                 embed_dim=768, depth=48, num_heads=16, mlp_ratio=4, qkv_bias=False,
+                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-6, depth_token_only=2,
+                 **kwargs)
+
+    if pretrained:
+        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cait.cait_s24_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/cait.py +
383
+384
+385
+386
+387
+388
+389
+390
+391
+392
@register_model
+def cait_s24_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:
+    model = CaiT(img_size=224, patch_size=16, in_channels=in_channels, num_classes=num_classes,
+                 embed_dim=384, depth=24, num_heads=8, mlp_ratio=4, qkv_bias=False,
+                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-5, depth_token_only=2,
+                 **kwargs)
+
+    if pretrained:
+        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cait.cait_s24_384(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/cait.py +
395
+396
+397
+398
+399
+400
+401
+402
+403
+404
@register_model
+def cait_s24_384(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:
+    model = CaiT(img_size=384, patch_size=16, in_channels=in_channels, num_classes=num_classes,
+                 embed_dim=384, depth=24, num_heads=8, mlp_ratio=4, qkv_bias=False,
+                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-5, depth_token_only=2,
+                 **kwargs)
+
+    if pretrained:
+        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cait.cait_s36_384(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/cait.py +
407
+408
+409
+410
+411
+412
+413
+414
+415
+416
@register_model
+def cait_s36_384(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:
+    model = CaiT(img_size=384, patch_size=16, in_channels=in_channels, num_classes=num_classes,
+                 embed_dim=384, depth=36, num_heads=8, mlp_ratio=4, qkv_bias=False,
+                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-6, depth_token_only=2,
+                 **kwargs)
+
+    if pretrained:
+        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cait.cait_xs24_384(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/cait.py +
371
+372
+373
+374
+375
+376
+377
+378
+379
+380
@register_model
+def cait_xs24_384(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:
+    model = CaiT(img_size=384, patch_size=16, in_channels=in_channels, num_classes=num_classes,
+                 embed_dim=288, depth=24, num_heads=6, mlp_ratio=4, qkv_bias=False,
+                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-5, depth_token_only=2,
+                 **kwargs)
+
+    if pretrained:
+        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cait.cait_xxs24_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/cait.py +
359
+360
+361
+362
+363
+364
+365
+366
+367
+368
@register_model
+def cait_xxs24_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:
+    model = CaiT(img_size=224, patch_size=16, in_channels=in_channels, num_classes=num_classes,
+                 embed_dim=192, depth=24, num_heads=4, mlp_ratio=4, qkv_bias=False,
+                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-5, depth_token_only=2,
+                 **kwargs)
+
+    if pretrained:
+        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

cmt

+ + +
+ + + +

+ mindcv.models.cmt.CMT + + +

+ + +
+

+ Bases: nn.Cell

+ + +
+ Source code in mindcv/models/cmt.py +
205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
class CMT(nn.Cell):
+    def __init__(
+        self,
+        img_size=224,
+        in_channels=3,
+        num_classes=1000,
+        embed_dims=None,
+        stem_channel=16,
+        fc_dim=1280,
+        num_heads=None,
+        mlp_ratios=None,
+        qkv_bias=True,
+        qk_scale=None,
+        drop_rate=0.0,
+        attn_drop_rate=0.0,
+        drop_path_rate=0.0,
+        norm_layer=None,
+        depths=None,
+        qk_ratio=1,
+        sr_ratios=None,
+    ):
+        super().__init__()
+        self.num_classes = num_classes
+        self.num_features = self.embed_dim = embed_dims[-1]
+        norm_layer = norm_layer or nn.LayerNorm
+
+        self.stem_conv1 = nn.Conv2d(
+            3, stem_channel, kernel_size=3, stride=2, pad_mode='pad', padding=1, has_bias=True)
+        self.stem_relu1 = nn.GELU()
+        self.stem_norm1 = nn.BatchNorm2d(stem_channel)
+
+        self.stem_conv2 = nn.Conv2d(
+            stem_channel, stem_channel, kernel_size=3, stride=1, pad_mode='pad', padding=1, has_bias=True)
+        self.stem_relu2 = nn.GELU()
+        self.stem_norm2 = nn.BatchNorm2d(stem_channel)
+
+        self.stem_conv3 = nn.Conv2d(
+            stem_channel, stem_channel, kernel_size=3, stride=1, pad_mode='pad', padding=1, has_bias=True)
+        self.stem_relu3 = nn.GELU()
+        self.stem_norm3 = nn.BatchNorm2d(stem_channel)
+
+        self.patch_embed_a = PatchEmbed(
+            img_size=img_size // 2, patch_size=2, in_chans=stem_channel, embed_dim=embed_dims[0])
+        self.patch_embed_b = PatchEmbed(
+            img_size=img_size // 4, patch_size=2, in_chans=embed_dims[0], embed_dim=embed_dims[1])
+        self.patch_embed_c = PatchEmbed(
+            img_size=img_size // 8, patch_size=2, in_chans=embed_dims[1], embed_dim=embed_dims[2])
+        self.patch_embed_d = PatchEmbed(
+            img_size=img_size // 16, patch_size=2, in_chans=embed_dims[2], embed_dim=embed_dims[3])
+
+        self.relative_pos_a = ops.zeros(
+            (num_heads[0], self.patch_embed_a.num_patches,
+             self.patch_embed_a.num_patches // sr_ratios[0] // sr_ratios[0]),
+            mindspore.float32)
+        self.relative_pos_b = ops.zeros(
+            (num_heads[1], self.patch_embed_b.num_patches,
+             self.patch_embed_b.num_patches // sr_ratios[1] // sr_ratios[1]),
+            mindspore.float32)
+        self.relative_pos_c = ops.zeros(
+            (num_heads[2], self.patch_embed_c.num_patches,
+             self.patch_embed_c.num_patches // sr_ratios[2] // sr_ratios[2]),
+            mindspore.float32)
+        self.relative_pos_d = ops.zeros(
+            (num_heads[3], self.patch_embed_d.num_patches,
+             self.patch_embed_d.num_patches // sr_ratios[3] // sr_ratios[3]),
+            mindspore.float32)
+
+        # stochastic depth decay rule
+        dpr = [x.item() for x in np.linspace(0, drop_path_rate, sum(depths))]
+        cur = 0
+        self.blocks_a = nn.CellList([
+            Block(
+                dim=embed_dims[0], num_heads=num_heads[0], mlp_ratio=mlp_ratios[0], qkv_bias=qkv_bias,
+                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[
+                    cur + i],
+                norm_layer=norm_layer, qk_ratio=qk_ratio, sr_ratio=sr_ratios[0])
+            for i in range(depths[0])])
+        cur += depths[0]
+        self.blocks_b = nn.CellList([
+            Block(
+                dim=embed_dims[1], num_heads=num_heads[1], mlp_ratio=mlp_ratios[1], qkv_bias=qkv_bias,
+                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[
+                    cur + i],
+                norm_layer=norm_layer, qk_ratio=qk_ratio, sr_ratio=sr_ratios[1])
+            for i in range(depths[1])])
+        cur += depths[1]
+        self.blocks_c = nn.CellList([
+            Block(
+                dim=embed_dims[2], num_heads=num_heads[2], mlp_ratio=mlp_ratios[2], qkv_bias=qkv_bias,
+                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[
+                    cur + i],
+                norm_layer=norm_layer, qk_ratio=qk_ratio, sr_ratio=sr_ratios[2])
+            for i in range(depths[2])])
+        cur += depths[2]
+        self.blocks_d = nn.CellList([
+            Block(
+                dim=embed_dims[3], num_heads=num_heads[3], mlp_ratio=mlp_ratios[3], qkv_bias=qkv_bias,
+                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[
+                    cur + i],
+                norm_layer=norm_layer, qk_ratio=qk_ratio, sr_ratio=sr_ratios[3])
+            for i in range(depths[3])])
+
+        # Classifier head
+        self._fc = nn.Conv2d(
+            embed_dims[-1], fc_dim, kernel_size=1, has_bias=True)
+        self._bn = nn.BatchNorm2d(fc_dim)
+        self._drop = Dropout(p=drop_rate)
+        self.head = nn.Dense(
+            fc_dim, num_classes) if num_classes > 0 else ops.Identity()
+
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(mode='fan_out', nonlinearity='relu'),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer('zeros', cell.bias.shape, cell.bias.dtype))
+
+            if isinstance(cell, nn.Dense):
+                cell.weight.set_data(init.initializer(init.TruncatedNormal(sigma=0.02), cell.weight.shape,
+                                                      cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer('zeros', cell.bias.shape, cell.bias.dtype))
+
+            elif isinstance(cell, (nn.LayerNorm, nn.BatchNorm2d)):
+                cell.gamma.set_data(init.initializer('ones', cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer('zeros', cell.beta.shape, cell.beta.dtype))
+
+    def forward_features(self, x):
+        B = x.shape[0]
+        x = self.stem_conv1(x)
+        x = self.stem_relu1(x)
+        x = self.stem_norm1(x)
+
+        x = self.stem_conv2(x)
+        x = self.stem_relu2(x)
+        x = self.stem_norm2(x)
+
+        x = self.stem_conv3(x)
+        x = self.stem_relu3(x)
+        x = self.stem_norm3(x)
+
+        x, (H, W) = self.patch_embed_a(x)
+        for _, blk in enumerate(self.blocks_a):
+            x = blk(x, H, W, self.relative_pos_a)
+
+        x = ops.transpose(x.reshape(B, H, W, -1), (0, 3, 1, 2))
+        x, (H, W) = self.patch_embed_b(x)
+        for _, blk in enumerate(self.blocks_b):
+            x = blk(x, H, W, self.relative_pos_b)
+
+        x = ops.transpose(x.reshape(B, H, W, -1), (0, 3, 1, 2))
+        x, (H, W) = self.patch_embed_c(x)
+        for _, blk in enumerate(self.blocks_c):
+            x = blk(x, H, W, self.relative_pos_c)
+
+        x = ops.transpose(x.reshape(B, H, W, -1), (0, 3, 1, 2))
+        x, (H, W) = self.patch_embed_d(x)
+        for _, blk in enumerate(self.blocks_d):
+            x = blk(x, H, W, self.relative_pos_d)
+
+        B, _, C = x.shape
+
+        x = self._fc(ops.transpose(x, (0, 2, 1)).reshape(B, C, H, W))
+        x = self._bn(x)
+        x = swish(x)
+        x = GlobalAvgPooling()(x)
+        x = self._drop(x)
+        return x
+
+    def forward_head(self, x):
+        x = self.head(x)
+        return x
+
+    def construct(self, x):
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.cmt.cmt_base(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

CMT-Base

+ +
+ Source code in mindcv/models/cmt.py +
441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
@register_model
+def cmt_base(pretrained=False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """
+    CMT-Base
+    """
+    default_cfg = default_cfgs["cmt_base"]
+
+    model = CMT(img_size=256, num_classes=num_classes, in_channels=in_channels, qkv_bias=True,
+                embed_dims=[76, 152, 304, 608], stem_channel=38, num_heads=[1, 2, 4, 8], depths=[4, 4, 20, 4],
+                mlp_ratios=[4, 4, 4, 4], qk_ratio=1, sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cmt.cmt_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

CMT-Small

+ +
+ Source code in mindcv/models/cmt.py +
424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
@register_model
+def cmt_small(pretrained=False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """
+    CMT-Small
+    """
+    default_cfg = default_cfgs["cmt_small"]
+
+    model = CMT(img_size=224, num_classes=num_classes, in_channels=in_channels, qkv_bias=True,
+                embed_dims=[64, 128, 256, 512], stem_channel=32, num_heads=[1, 2, 4, 8], depths=[3, 3, 16, 3],
+                mlp_ratios=[4, 4, 4, 4], qk_ratio=1, sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cmt.cmt_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

CMT-tiny

+ +
+ Source code in mindcv/models/cmt.py +
390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
@register_model
+def cmt_tiny(pretrained=False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """
+    CMT-tiny
+    """
+    default_cfg = default_cfgs["cmt_tiny"]
+
+    model = CMT(img_size=160, num_classes=num_classes, in_channels=in_channels, qkv_bias=True,
+                embed_dims=[46, 92, 184, 368], stem_channel=16, num_heads=[1, 2, 4, 8], depths=[2, 2, 10, 2],
+                mlp_ratios=[3.6, 3.6, 3.6, 3.6], qk_ratio=1, sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cmt.cmt_xsmall(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

CMT-XSmall

+ +
+ Source code in mindcv/models/cmt.py +
407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
@register_model
+def cmt_xsmall(pretrained=False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """
+    CMT-XSmall
+    """
+    default_cfg = default_cfgs["cmt_xsmall"]
+
+    model = CMT(img_size=192, num_classes=num_classes, in_channels=in_channels, qkv_bias=True,
+                embed_dims=[52, 104, 208, 416], stem_channel=16, num_heads=[1, 2, 4, 8], depths=[3, 3, 12, 3],
+                mlp_ratios=[3.8, 3.8, 3.8, 3.8], qk_ratio=1, sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

coat

+ + + +
+ + + +

+mindcv.models.coat.coat_lite_medium(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/coat.py +
777
+778
+779
+780
+781
+782
+783
+784
+785
+786
+787
+788
@register_model
+def coat_lite_medium(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs['coat_lite_medium']
+    model = CoaT(in_channels=in_channels, num_classes=num_classes,
+                 patch_size=4, embed_dims=[128, 256, 320, 512],
+                 serial_depths=[3, 6, 10, 8], parallel_depth=0,
+                 num_heads=8, mlp_ratios=[4, 4, 4, 4], **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.coat.coat_lite_mini(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/coat.py +
749
+750
+751
+752
+753
+754
+755
+756
+757
+758
+759
+760
@register_model
+def coat_lite_mini(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs['coat_lite_mini']
+    model = CoaT(in_channels=in_channels, num_classes=num_classes,
+                 patch_size=4, embed_dims=[64, 128, 320, 512],
+                 serial_depths=[2, 2, 2, 2], parallel_depth=0,
+                 num_heads=8, mlp_ratios=[8, 8, 4, 4], **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.coat.coat_lite_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/coat.py +
763
+764
+765
+766
+767
+768
+769
+770
+771
+772
+773
+774
@register_model
+def coat_lite_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs['coat_lite_small']
+    model = CoaT(in_channels=in_channels, num_classes=num_classes,
+                 patch_size=4, embed_dims=[64, 128, 320, 512],
+                 serial_depths=[3, 4, 6, 3], parallel_depth=0,
+                 num_heads=8, mlp_ratios=[8, 8, 4, 4], **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.coat.coat_lite_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/coat.py +
735
+736
+737
+738
+739
+740
+741
+742
+743
+744
+745
+746
@register_model
+def coat_lite_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs['coat_lite_tiny']
+    model = CoaT(in_channels=in_channels, num_classes=num_classes,
+                 patch_size=4, embed_dims=[64, 128, 256, 320],
+                 serial_depths=[2, 2, 2, 2], parallel_depth=0,
+                 num_heads=8, mlp_ratios=[8, 8, 4, 4], **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.coat.coat_mini(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/coat.py +
707
+708
+709
+710
+711
+712
+713
+714
+715
+716
+717
+718
@register_model
+def coat_mini(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs['coat_mini']
+    model = CoaT(in_channels=in_channels, num_classes=num_classes,
+                 patch_size=4, embed_dims=[152, 216, 216, 216],
+                 serial_depths=[2, 2, 2, 2], parallel_depth=6,
+                 num_heads=8, mlp_ratios=[4, 4, 4, 4], **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.coat.coat_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/coat.py +
721
+722
+723
+724
+725
+726
+727
+728
+729
+730
+731
+732
@register_model
+def coat_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs['coat_small']
+    model = CoaT(in_channels=in_channels, num_classes=num_classes,
+                 patch_size=4, embed_dims=[152, 320, 320, 320],
+                 serial_depths=[2, 2, 2, 2], parallel_depth=6,
+                 num_heads=8, mlp_ratios=[4, 4, 4, 4], **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.coat.coat_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/coat.py +
693
+694
+695
+696
+697
+698
+699
+700
+701
+702
+703
+704
@register_model
+def coat_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs['coat_mini']
+    model = CoaT(in_channels=in_channels, num_classes=num_classes,
+                 patch_size=4, embed_dims=[152, 152, 152, 152],
+                 serial_depths=[2, 2, 2, 2], parallel_depth=6,
+                 num_heads=8, mlp_ratios=[4, 4, 4, 4], **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

convit

+ + +
+ + + +

+ mindcv.models.convit.ConViT + + +

+ + +
+

+ Bases: nn.Cell

+ + +

ConViT model class, based on +'"Improving Vision Transformers with Soft Convolutional Inductive Biases" +https://arxiv.org/pdf/2103.10697.pdf'

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int) + + + DEFAULT: + 1000 + +

+
image_size +
+

images input size. Default: 224.

+
+

+ + TYPE: + int) + + + DEFAULT: + 224 + +

+
patch_size +
+

image patch size. Default: 16.

+
+

+ + TYPE: + int) + + + DEFAULT: + 16 + +

+
embed_dim +
+

embedding dimension in all head. Default: 48.

+
+

+ + TYPE: + int) + + + DEFAULT: + 48 + +

+
num_heads +
+

number of heads. Default: 12.

+
+

+ + TYPE: + int) + + + DEFAULT: + 12 + +

+
drop_rate +
+

dropout rate. Default: 0.

+
+

+ + TYPE: + float) + + + DEFAULT: + 0.0 + +

+
drop_path_rate +
+

drop path rate. Default: 0.1.

+
+

+ + TYPE: + float) + + + DEFAULT: + 0.1 + +

+
depth +
+

model block depth. Default: 12.

+
+

+ + TYPE: + int) + + + DEFAULT: + 12 + +

+
mlp_ratio +
+

ratio of hidden features in Mlp. Default: 4.

+
+

+ + TYPE: + float) + + + DEFAULT: + 4.0 + +

+
qkv_bias +
+

have bias in qkv layers or not. Default: False.

+
+

+ + TYPE: + bool) + + + DEFAULT: + False + +

+
attn_drop_rate +
+

attention layers dropout rate. Default: 0.

+
+

+ + TYPE: + float) + + + DEFAULT: + 0.0 + +

+
locality_strength +
+

determines how focused each head is around its attention center. Default: 1.

+
+

+ + TYPE: + float) + + + DEFAULT: + 1.0 + +

+
local_up_to_layer +
+

number of GPSA layers. Default: 10.

+
+

+ + TYPE: + int) + + + DEFAULT: + 10 + +

+
use_pos_embed +
+

whether use the embeded position. Default: True.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
locality_strength(float) +
+

the strength of locality. Default: 1.

+
+

+

+
+ +
+ Source code in mindcv/models/convit.py +
210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
class ConViT(nn.Cell):
+    r"""ConViT model class, based on
+    '"Improving Vision Transformers with Soft Convolutional Inductive Biases"
+    <https://arxiv.org/pdf/2103.10697.pdf>'
+
+    Args:
+        in_channels (int): number the channels of the input. Default: 3.
+        num_classes (int) : number of classification classes. Default: 1000.
+        image_size (int) : images input size. Default: 224.
+        patch_size (int) : image patch size. Default: 16.
+        embed_dim (int) : embedding dimension in all head. Default: 48.
+        num_heads (int) : number of heads. Default: 12.
+        drop_rate (float) : dropout rate. Default: 0.
+        drop_path_rate (float) : drop path rate. Default: 0.1.
+        depth (int) : model block depth. Default: 12.
+        mlp_ratio (float) : ratio of hidden features in Mlp. Default: 4.
+        qkv_bias (bool) : have bias in qkv layers or not. Default: False.
+        attn_drop_rate (float) : attention layers dropout rate. Default: 0.
+        locality_strength (float) : determines how focused each head is around its attention center. Default: 1.
+        local_up_to_layer (int) : number of GPSA layers. Default: 10.
+        use_pos_embed (bool): whether use the embeded position.  Default: True.
+        locality_strength(float): the strength of locality. Default: 1.
+    """
+
+    def __init__(
+        self,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+        image_size: int = 224,
+        patch_size: int = 16,
+        embed_dim: int = 48,
+        num_heads: int = 12,
+        drop_rate: float = 0.0,
+        drop_path_rate: float = 0.1,
+        depth: int = 12,
+        mlp_ratio: float = 4.0,
+        qkv_bias: bool = False,
+        attn_drop_rate: float = 0.0,
+        local_up_to_layer: int = 10,
+        use_pos_embed: bool = True,
+        locality_strength: float = 1.0,
+    ) -> None:
+        super().__init__()
+
+        self.local_up_to_layer = local_up_to_layer
+        self.use_pos_embed = use_pos_embed
+        self.num_heads = num_heads
+        self.locality_strength = locality_strength
+        self.embed_dim = embed_dim
+
+        self.patch_embed = PatchEmbed(
+            image_size=image_size, patch_size=patch_size, in_chans=in_channels, embed_dim=embed_dim)
+        self.num_patches = self.patch_embed.num_patches
+
+        self.cls_token = Parameter(ops.Zeros()((1, 1, embed_dim), ms.float32))
+        self.pos_drop = Dropout(p=drop_rate)
+
+        if self.use_pos_embed:
+            self.pos_embed = Parameter(ops.Zeros()((1, self.num_patches, embed_dim), ms.float32))
+            self.pos_embed.set_data(init.initializer(init.TruncatedNormal(sigma=0.02), self.pos_embed.data.shape))
+
+        dpr = [x.item() for x in np.linspace(0, drop_path_rate, depth)]
+        self.blocks = nn.CellList([
+            Block(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
+                drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
+                use_gpsa=True)
+            if i < local_up_to_layer else
+            Block(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
+                drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
+                use_gpsa=False)
+            for i in range(depth)])
+        self.norm = nn.LayerNorm((embed_dim,))
+
+        self.classifier = nn.Dense(in_channels=embed_dim, out_channels=num_classes) if num_classes > 0 else Identity()
+        self.cls_token.set_data(init.initializer(init.TruncatedNormal(sigma=0.02), self.cls_token.data.shape))
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Dense):
+                cell.weight.set_data(init.initializer(init.TruncatedNormal(sigma=0.02), cell.weight.data.shape))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Constant(0), cell.bias.shape))
+            elif isinstance(cell, nn.LayerNorm):
+                cell.gamma.set_data(init.initializer(init.Constant(1), cell.gamma.shape))
+                cell.beta.set_data(init.initializer(init.Constant(0), cell.beta.shape))
+        # local init
+        for i in range(self.local_up_to_layer):
+            self.blocks[i].attn.v.weight.set_data(ops.eye(self.embed_dim, self.embed_dim, ms.float32), slice_shape=True)
+            locality_distance = 1
+            kernel_size = int(self.num_heads**0.5)
+            center = (kernel_size - 1) / 2 if kernel_size % 2 == 0 else kernel_size // 2
+            pos_weight_data = self.blocks[i].attn.pos_proj.weight.data
+            for h1 in range(kernel_size):
+                for h2 in range(kernel_size):
+                    position = h1 + kernel_size * h2
+                    pos_weight_data[position, 2] = -1
+                    pos_weight_data[position, 1] = 2 * (h1 - center) * locality_distance
+                    pos_weight_data[position, 0] = 2 * (h2 - center) * locality_distance
+            pos_weight_data = pos_weight_data * self.locality_strength
+            self.blocks[i].attn.pos_proj.weight.set_data(pos_weight_data)
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.patch_embed(x)
+        if self.use_pos_embed:
+            x = x + self.pos_embed
+        x = self.pos_drop(x)
+        cls_tokens = ops.tile(self.cls_token, (x.shape[0], 1, 1))
+        for u, blk in enumerate(self.blocks):
+            if u == self.local_up_to_layer:
+                x = ops.Cast()(x, cls_tokens.dtype)
+                x = ops.concat((cls_tokens, x), 1)
+            x = blk(x)
+        x = self.norm(x)
+        return x[:, 0]
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.convit.convit_base(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConViT base model +Refer to the base class "models.ConViT" for more details.

+ +
+ Source code in mindcv/models/convit.py +
398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
@register_model
+def convit_base(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:
+    """Get ConViT base model
+    Refer to the base class "models.ConViT" for more details.
+    """
+    default_cfg = default_cfgs["convit_base"]
+    model = ConViT(in_channels=in_channels, num_classes=num_classes,
+                   num_heads=16, embed_dim=768, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convit.convit_base_plus(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConViT base+ model +Refer to the base class "models.ConViT" for more details.

+ +
+ Source code in mindcv/models/convit.py +
413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
@register_model
+def convit_base_plus(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:
+    """Get ConViT base+ model
+    Refer to the base class "models.ConViT" for more details.
+    """
+    default_cfg = default_cfgs["convit_base_plus"]
+    model = ConViT(in_channels=in_channels, num_classes=num_classes,
+                   num_heads=16, embed_dim=1024, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convit.convit_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConViT small model +Refer to the base class "models.ConViT" for more details.

+ +
+ Source code in mindcv/models/convit.py +
368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
@register_model
+def convit_small(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:
+    """Get ConViT small model
+    Refer to the base class "models.ConViT" for more details.
+    """
+    default_cfg = default_cfgs["convit_small"]
+    model = ConViT(in_channels=in_channels, num_classes=num_classes,
+                   num_heads=9, embed_dim=432, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convit.convit_small_plus(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConViT small+ model +Refer to the base class "models.ConViT" for more details.

+ +
+ Source code in mindcv/models/convit.py +
383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
@register_model
+def convit_small_plus(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:
+    """Get ConViT small+ model
+    Refer to the base class "models.ConViT" for more details.
+    """
+    default_cfg = default_cfgs["convit_small_plus"]
+    model = ConViT(in_channels=in_channels, num_classes=num_classes,
+                   num_heads=9, embed_dim=576, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convit.convit_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConViT tiny model +Refer to the base class "models.ConViT" for more details.

+ +
+ Source code in mindcv/models/convit.py +
338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
@register_model
+def convit_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:
+    """Get ConViT tiny model
+    Refer to the base class "models.ConViT" for more details.
+    """
+    default_cfg = default_cfgs["convit_tiny"]
+    model = ConViT(in_channels=in_channels, num_classes=num_classes,
+                   num_heads=4, embed_dim=192, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convit.convit_tiny_plus(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConViT tiny+ model +Refer to the base class "models.ConViT" for more details.

+ +
+ Source code in mindcv/models/convit.py +
353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
@register_model
+def convit_tiny_plus(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:
+    """Get ConViT tiny+ model
+    Refer to the base class "models.ConViT" for more details.
+    """
+    default_cfg = default_cfgs["convit_tiny_plus"]
+    model = ConViT(in_channels=in_channels, num_classes=num_classes,
+                   num_heads=4, embed_dim=256, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

convnext

+ + +
+ + + +

+ mindcv.models.convnext.ConvNeXt + + +

+ + +
+

+ Bases: nn.Cell

+ + +

ConvNeXt and ConvNeXt V2 model class, based on +"A ConvNet for the 2020s" <https://arxiv.org/abs/2201.03545>_ and +"ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders" <https://arxiv.org/abs/2301.00808>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
in_channels +
+

dim of the input channel.

+
+

+ + TYPE: + int + +

+
num_classes +
+

dim of the classes predicted.

+
+

+ + TYPE: + int + +

+
depths +
+

the depths of each layer.

+
+

+ + TYPE: + List[int] + +

+
dims +
+

the middle dim of each layer.

+
+

+ + TYPE: + List[int] + +

+
drop_path_rate +
+

the rate of droppath. Default: 0.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
layer_scale_init_value +
+

the parameter of init for the classifier. Default: 1e-6.

+
+

+ + TYPE: + float + + + DEFAULT: + 1e-06 + +

+
head_init_scale +
+

the parameter of init for the head. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
use_grn +
+

If True, use Global Response Normalization in each block. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/convnext.py +
156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
class ConvNeXt(nn.Cell):
+    r"""ConvNeXt and ConvNeXt V2 model class, based on
+    `"A ConvNet for the 2020s" <https://arxiv.org/abs/2201.03545>`_ and
+    `"ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders" <https://arxiv.org/abs/2301.00808>`_
+
+    Args:
+        in_channels: dim of the input channel.
+        num_classes: dim of the classes predicted.
+        depths: the depths of each layer.
+        dims: the middle dim of each layer.
+        drop_path_rate: the rate of droppath. Default: 0.0.
+        layer_scale_init_value: the parameter of init for the classifier. Default: 1e-6.
+        head_init_scale: the parameter of init for the head. Default: 1.0.
+        use_grn: If True, use Global Response Normalization in each block. Default: False.
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        num_classes: int,
+        depths: List[int],
+        dims: List[int],
+        drop_path_rate: float = 0.0,
+        layer_scale_init_value: float = 1e-6,
+        head_init_scale: float = 1.0,
+        use_grn: bool = False,
+    ):
+        super().__init__()
+
+        downsample_layers = []  # stem and 3 intermediate down_sampling conv layers
+        stem = nn.SequentialCell(
+            nn.Conv2d(in_channels, dims[0], kernel_size=4, stride=4, has_bias=True),
+            ConvNextLayerNorm((dims[0],), epsilon=1e-6, norm_axis=1),
+        )
+        downsample_layers.append(stem)
+        for i in range(3):
+            downsample_layer = nn.SequentialCell(
+                ConvNextLayerNorm((dims[i],), epsilon=1e-6, norm_axis=1),
+                nn.Conv2d(dims[i], dims[i + 1], kernel_size=2, stride=2, has_bias=True),
+            )
+            downsample_layers.append(downsample_layer)
+
+        total_reduction = 4
+        self.feature_info = []
+        self.flatten_sequential = True
+
+        stages = []  # 4 feature resolution stages, each consisting of multiple residual blocks
+        dp_rates = list(np.linspace(0, drop_path_rate, sum(depths)))
+        cur = 0
+        for i in range(4):
+            blocks = []
+            for j in range(depths[i]):
+                blocks.append(Block(dim=dims[i], drop_path=dp_rates[cur + j],
+                                    layer_scale_init_value=layer_scale_init_value, use_grn=use_grn))
+            stage = nn.SequentialCell(blocks)
+            stages.append(stage)
+            cur += depths[i]
+
+            if i > 0:
+                total_reduction *= 2
+            self.feature_info.append(dict(chs=dims[i], reduction=total_reduction, name=f'feature.{i * 2 + 1}'))
+
+        self.feature = nn.SequentialCell([
+            downsample_layers[0],
+            stages[0],
+            downsample_layers[1],
+            stages[1],
+            downsample_layers[2],
+            stages[2],
+            downsample_layers[3],
+            stages[3]
+        ])
+        self.norm = ConvNextLayerNorm((dims[-1],), epsilon=1e-6)  # final norm layer
+        self.classifier = nn.Dense(dims[-1], num_classes)  # classifier
+        self.head_init_scale = head_init_scale
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, (nn.Dense, nn.Conv2d)):
+                cell.weight.set_data(
+                    init.initializer(init.TruncatedNormal(sigma=0.02), cell.weight.shape, cell.weight.dtype)
+                )
+                if isinstance(cell, nn.Dense) and cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Zero(), cell.bias.shape, cell.bias.dtype))
+        self.classifier.weight.set_data(self.classifier.weight * self.head_init_scale)
+        self.classifier.bias.set_data(self.classifier.bias * self.head_init_scale)
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.classifier(x)
+        return x
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.feature(x)
+        return self.norm(x.mean([-2, -1]))  # global average pooling, (N, C, H, W) -> (N, C)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnext_base(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt base model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
287
+288
+289
+290
+291
+292
+293
+294
+295
+296
@register_model
+def convnext_base(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt base model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnext_base"]
+    model_args = dict(
+        in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], **kwargs
+    )
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnext_large(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt large model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
299
+300
+301
+302
+303
+304
+305
+306
+307
+308
@register_model
+def convnext_large(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt large model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnext_large"]
+    model_args = dict(
+        in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], **kwargs
+    )
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnext_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt small model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
275
+276
+277
+278
+279
+280
+281
+282
+283
+284
@register_model
+def convnext_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt small model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnext_small"]
+    model_args = dict(
+        in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3], dims=[96, 192, 384, 768], **kwargs
+    )
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnext_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt tiny model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
263
+264
+265
+266
+267
+268
+269
+270
+271
+272
@register_model
+def convnext_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt tiny model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnext_tiny"]
+    model_args = dict(
+        in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], **kwargs
+    )
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnext_xlarge(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt xlarge model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
311
+312
+313
+314
+315
+316
+317
+318
+319
+320
@register_model
+def convnext_xlarge(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt xlarge model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnext_xlarge"]
+    model_args = dict(
+        in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3], dims=[256, 512, 1024, 2048], **kwargs
+    )
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_atto(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 atto model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
323
+324
+325
+326
+327
+328
+329
+330
+331
@register_model
+def convnextv2_atto(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 atto model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_atto"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[2, 2, 6, 2],
+                      dims=[40, 80, 160, 320], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_base(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 base model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
378
+379
+380
+381
+382
+383
+384
+385
+386
@register_model
+def convnextv2_base(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 base model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_base"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3],
+                      dims=[128, 256, 512, 1024], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_femto(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 femto model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
334
+335
+336
+337
+338
+339
+340
+341
+342
@register_model
+def convnextv2_femto(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 femto model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_femto"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[2, 2, 6, 2],
+                      dims=[48, 96, 192, 384], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_huge(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 huge model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
400
+401
+402
+403
+404
+405
+406
+407
+408
@register_model
+def convnextv2_huge(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 huge model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_huge"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3],
+                      dims=[352, 704, 1408, 2816], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_large(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 large model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
389
+390
+391
+392
+393
+394
+395
+396
+397
@register_model
+def convnextv2_large(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 large model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_large"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3],
+                      dims=[192, 384, 768, 1536], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_nano(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 nano model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
356
+357
+358
+359
+360
+361
+362
+363
+364
@register_model
+def convnextv2_nano(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 nano model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_nano"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[2, 2, 8, 2],
+                      dims=[80, 160, 320, 640], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_pico(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 pico model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
345
+346
+347
+348
+349
+350
+351
+352
+353
@register_model
+def convnextv2_pico(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 pico model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_pico"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[2, 2, 6, 2],
+                      dims=[64, 128, 256, 512], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 tiny model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
367
+368
+369
+370
+371
+372
+373
+374
+375
@register_model
+def convnextv2_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 tiny model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_tiny"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 9, 3],
+                      dims=[96, 192, 384, 768], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +

crossvit

+ + + +
+ + + +

+mindcv.models.crossvit.crossvit_15(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/crossvit.py +
466
+467
+468
+469
+470
+471
+472
+473
+474
+475
@register_model
+def crossvit_15(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VisionTransformer:
+    model = VisionTransformer(img_size=[240, 224],
+                              patch_size=[12, 16], embed_dim=[192, 384], depth=[[1, 5, 0], [1, 5, 0], [1, 5, 0]],
+                              num_heads=[6, 6], mlp_ratio=[3, 3, 1], qkv_bias=True,
+                              norm_layer=nn.LayerNorm, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    default_cfg = default_cfgs["crossvit_15"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.crossvit.crossvit_18(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/crossvit.py +
478
+479
+480
+481
+482
+483
+484
+485
+486
+487
@register_model
+def crossvit_18(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VisionTransformer:
+    model = VisionTransformer(img_size=[240, 224],
+                              patch_size=[12, 16], embed_dim=[224, 448], depth=[[1, 6, 0], [1, 6, 0], [1, 6, 0]],
+                              num_heads=[7, 7], mlp_ratio=[3, 3, 1], qkv_bias=True,
+                              norm_layer=nn.LayerNorm, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    default_cfg = default_cfgs["crossvit_18"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.crossvit.crossvit_9(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/crossvit.py +
454
+455
+456
+457
+458
+459
+460
+461
+462
+463
@register_model
+def crossvit_9(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    model = VisionTransformer(img_size=[240, 224],
+                              patch_size=[12, 16], embed_dim=[128, 256], depth=[[1, 3, 0], [1, 3, 0], [1, 3, 0]],
+                              num_heads=[4, 4], mlp_ratio=[3, 3, 1], qkv_bias=True,
+                              norm_layer=nn.LayerNorm, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    default_cfg = default_cfgs["crossvit_9"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

densenet

+ + +
+ + + +

+ mindcv.models.densenet.DenseNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Densenet-BC model class, based on +"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
growth_rate +
+

how many filters to add each layer (k in paper). Default: 32.

+
+

+ + TYPE: + int + + + DEFAULT: + 32 + +

+
block_config +
+

how many layers in each pooling block. Default: (6, 12, 24, 16).

+
+

+ + TYPE: + Tuple[int, int, int, int] + + + DEFAULT: + (6, 12, 24, 16) + +

+
num_init_features +
+

number of filters in the first Conv2d. Default: 64.

+
+

+ + TYPE: + int + + + DEFAULT: + 64 + +

+
bn_size +
+

multiplicative factor for number of bottleneck layers +(i.e. bn_size * k features in the bottleneck layer). Default: 4.

+
+

+ + TYPE: + int + + + DEFAULT: + 4 + +

+
drop_rate +
+

dropout rate after each dense layer. Default: 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
in_channels +
+

number of input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
+ +
+ Source code in mindcv/models/densenet.py +
126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
class DenseNet(nn.Cell):
+    r"""Densenet-BC model class, based on
+    `"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
+
+    Args:
+        growth_rate: how many filters to add each layer (`k` in paper). Default: 32.
+        block_config: how many layers in each pooling block. Default: (6, 12, 24, 16).
+        num_init_features: number of filters in the first Conv2d. Default: 64.
+        bn_size (int): multiplicative factor for number of bottleneck layers
+          (i.e. bn_size * k features in the bottleneck layer). Default: 4.
+        drop_rate: dropout rate after each dense layer. Default: 0.
+        in_channels: number of input channels. Default: 3.
+        num_classes: number of classification classes. Default: 1000.
+    """
+
+    def __init__(
+        self,
+        growth_rate: int = 32,
+        block_config: Tuple[int, int, int, int] = (6, 12, 24, 16),
+        num_init_features: int = 64,
+        bn_size: int = 4,
+        drop_rate: float = 0.0,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+    ) -> None:
+        super().__init__()
+        layers = OrderedDict()
+        # first Conv2d
+        num_features = num_init_features
+        layers["conv0"] = nn.Conv2d(in_channels, num_features, kernel_size=7, stride=2, pad_mode="pad", padding=3)
+        layers["norm0"] = nn.BatchNorm2d(num_features)
+        layers["relu0"] = nn.ReLU()
+        layers["pool0"] = nn.SequentialCell([
+            nn.Pad(paddings=((0, 0), (0, 0), (1, 1), (1, 1)), mode="CONSTANT"),
+            nn.MaxPool2d(kernel_size=3, stride=2),
+        ])
+
+        # DenseBlock
+        for i, num_layers in enumerate(block_config):
+            block = _DenseBlock(
+                num_layers=num_layers,
+                num_input_features=num_features,
+                bn_size=bn_size,
+                growth_rate=growth_rate,
+                drop_rate=drop_rate,
+            )
+            layers[f"denseblock{i + 1}"] = block
+            num_features += num_layers * growth_rate
+            if i != len(block_config) - 1:
+                transition = _Transition(num_features, num_features // 2)
+                layers[f"transition{i + 1}"] = transition
+                num_features = num_features // 2
+
+        # final bn+ReLU
+        layers["norm5"] = nn.BatchNorm2d(num_features)
+        layers["relu5"] = nn.ReLU()
+
+        self.num_features = num_features
+        self.features = nn.SequentialCell(layers)
+        self.pool = GlobalAvgPooling()
+        self.classifier = nn.Dense(self.num_features, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(math.sqrt(5), mode="fan_out", nonlinearity="relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="leaky_relu"),
+                                         cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="leaky_relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.densenet.densenet121(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 121 layers DenseNet model. +Refer to the base class models.DenseNet for more details.

+ +
+ Source code in mindcv/models/densenet.py +
225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
@register_model
+def densenet121(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DenseNet:
+    """Get 121 layers DenseNet model.
+     Refer to the base class `models.DenseNet` for more details."""
+    default_cfg = default_cfgs["densenet121"]
+    model = DenseNet(growth_rate=32, block_config=(6, 12, 24, 16), num_init_features=64, in_channels=in_channels,
+                     num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.densenet.densenet161(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 161 layers DenseNet model. +Refer to the base class models.DenseNet for more details.

+ +
+ Source code in mindcv/models/densenet.py +
239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
@register_model
+def densenet161(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DenseNet:
+    """Get 161 layers DenseNet model.
+     Refer to the base class `models.DenseNet` for more details."""
+    default_cfg = default_cfgs["densenet161"]
+    model = DenseNet(growth_rate=48, block_config=(6, 12, 36, 24), num_init_features=96, in_channels=in_channels,
+                     num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.densenet.densenet169(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 169 layers DenseNet model. +Refer to the base class models.DenseNet for more details.

+ +
+ Source code in mindcv/models/densenet.py +
253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
@register_model
+def densenet169(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DenseNet:
+    """Get 169 layers DenseNet model.
+     Refer to the base class `models.DenseNet` for more details."""
+    default_cfg = default_cfgs["densenet169"]
+    model = DenseNet(growth_rate=32, block_config=(6, 12, 32, 32), num_init_features=64, in_channels=in_channels,
+                     num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.densenet.densenet201(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 201 layers DenseNet model. +Refer to the base class models.DenseNet for more details.

+ +
+ Source code in mindcv/models/densenet.py +
267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
@register_model
+def densenet201(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DenseNet:
+    """Get 201 layers DenseNet model.
+     Refer to the base class `models.DenseNet` for more details."""
+    default_cfg = default_cfgs["densenet201"]
+    model = DenseNet(growth_rate=32, block_config=(6, 12, 48, 32), num_init_features=64, in_channels=in_channels,
+                     num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

dpn

+ + +
+ + + +

+ mindcv.models.dpn.DPN + + +

+ + +
+

+ Bases: nn.Cell

+ + +

DPN model class, based on +"Dual Path Networks" <https://arxiv.org/pdf/1707.01629.pdf>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_init_channel +
+

int type, the output channel of first blocks. Default: 64.

+
+

+ + TYPE: + int + + + DEFAULT: + 64 + +

+
k_r +
+

int type, the first channel of each stage. Default: 96.

+
+

+ + TYPE: + int + + + DEFAULT: + 96 + +

+
g +
+

int type,number of group in the conv2d. Default: 32.

+
+

+ + TYPE: + int + + + DEFAULT: + 32 + +

+
k_sec +
+

multiplicative factor for number of bottleneck layers. Default: 4.

+
+

+ + TYPE: + Tuple[int] + + + DEFAULT: + (3, 4, 20, 3) + +

+
inc_sec +
+

the first output channel in each stage. Default: (16, 32, 24, 128).

+
+

+ + TYPE: + Tuple[int] + + + DEFAULT: + (16, 32, 24, 128) + +

+
in_channels +
+

int type, number of input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

int type, number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
+ +
+ Source code in mindcv/models/dpn.py +
140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
class DPN(nn.Cell):
+    r"""DPN model class, based on
+    `"Dual Path Networks" <https://arxiv.org/pdf/1707.01629.pdf>`_
+
+    Args:
+        num_init_channel: int type, the output channel of first blocks. Default: 64.
+        k_r: int type, the first channel of each stage. Default: 96.
+        g: int type,number of group in the conv2d. Default: 32.
+        k_sec Tuple[int]: multiplicative factor for number of bottleneck layers. Default: 4.
+        inc_sec Tuple[int]: the first output channel in each stage. Default: (16, 32, 24, 128).
+        in_channels: int type, number of input channels. Default: 3.
+        num_classes: int type, number of classification classes. Default: 1000.
+    """
+
+    def __init__(
+        self,
+        num_init_channel: int = 64,
+        k_r: int = 96,
+        g: int = 32,
+        k_sec: Tuple[int, int, int, int] = (3, 4, 20, 3),
+        inc_sec: Tuple[int, int, int, int] = (16, 32, 24, 128),
+        in_channels: int = 3,
+        num_classes: int = 1000,
+    ):
+        super().__init__()
+        blocks = OrderedDict()
+
+        # conv1
+        blocks["conv1"] = nn.SequentialCell(OrderedDict([
+            ("conv", nn.Conv2d(in_channels, num_init_channel, kernel_size=7, stride=2, pad_mode="pad", padding=3)),
+            ("norm", nn.BatchNorm2d(num_init_channel, eps=1e-3, momentum=0.9)),
+            ("relu", nn.ReLU()),
+            ("maxpool", nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")),
+        ]))
+
+        # conv2
+        bw = 256
+        inc = inc_sec[0]
+        r = int((k_r * bw) / 256)
+        blocks["conv2_1"] = DualPathBlock(num_init_channel, r, r, bw, inc, g, "proj", False)
+        in_channel = bw + 3 * inc
+        for i in range(2, k_sec[0] + 1):
+            blocks[f"conv2_{i}"] = DualPathBlock(in_channel, r, r, bw, inc, g, "normal")
+            in_channel += inc
+
+        # conv3
+        bw = 512
+        inc = inc_sec[1]
+        r = int((k_r * bw) / 256)
+        blocks["conv3_1"] = DualPathBlock(in_channel, r, r, bw, inc, g, "down")
+        in_channel = bw + 3 * inc
+        for i in range(2, k_sec[1] + 1):
+            blocks[f"conv3_{i}"] = DualPathBlock(in_channel, r, r, bw, inc, g, "normal")
+            in_channel += inc
+
+        # conv4
+        bw = 1024
+        inc = inc_sec[2]
+        r = int((k_r * bw) / 256)
+        blocks["conv4_1"] = DualPathBlock(in_channel, r, r, bw, inc, g, "down")
+        in_channel = bw + 3 * inc
+        for i in range(2, k_sec[2] + 1):
+            blocks[f"conv4_{i}"] = DualPathBlock(in_channel, r, r, bw, inc, g, "normal")
+            in_channel += inc
+
+        # conv5
+        bw = 2048
+        inc = inc_sec[3]
+        r = int((k_r * bw) / 256)
+        blocks["conv5_1"] = DualPathBlock(in_channel, r, r, bw, inc, g, "down")
+        in_channel = bw + 3 * inc
+        for i in range(2, k_sec[3] + 1):
+            blocks[f"conv5_{i}"] = DualPathBlock(in_channel, r, r, bw, inc, g, "normal")
+            in_channel += inc
+
+        self.features = nn.SequentialCell(blocks)
+        self.conv5_x = nn.SequentialCell(OrderedDict([
+            ("norm", nn.BatchNorm2d(in_channel, eps=1e-3, momentum=0.9)),
+            ("relu", nn.ReLU()),
+        ]))
+        self.avgpool = GlobalAvgPooling()
+        self.classifier = nn.Dense(in_channel, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(math.sqrt(5), mode="fan_out", nonlinearity="relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="leaky_relu"),
+                                         cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="leaky_relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_feature(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        x = ops.concat(x, axis=1)
+        x = self.conv5_x(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.avgpool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_feature(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.dpn.dpn107(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 107 layers DPN model. +Refer to the base class models.DPN for more details.

+ +
+ Source code in mindcv/models/dpn.py +
304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
@register_model
+def dpn107(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DPN:
+    """Get 107 layers DPN model.
+     Refer to the base class `models.DPN` for more details."""
+    default_cfg = default_cfgs["dpn107"]
+    model = DPN(num_init_channel=128, k_r=200, g=50, k_sec=(4, 8, 20, 3), inc_sec=(20, 64, 64, 128),
+                num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.dpn.dpn131(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 131 layers DPN model. +Refer to the base class models.DPN for more details.

+ +
+ Source code in mindcv/models/dpn.py +
290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
@register_model
+def dpn131(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DPN:
+    """Get 131 layers DPN model.
+     Refer to the base class `models.DPN` for more details."""
+    default_cfg = default_cfgs["dpn131"]
+    model = DPN(num_init_channel=128, k_r=160, g=40, k_sec=(4, 8, 28, 3), inc_sec=(16, 32, 32, 128),
+                num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.dpn.dpn92(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 92 layers DPN model. +Refer to the base class models.DPN for more details.

+ +
+ Source code in mindcv/models/dpn.py +
262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
@register_model
+def dpn92(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DPN:
+    """Get 92 layers DPN model.
+     Refer to the base class `models.DPN` for more details."""
+    default_cfg = default_cfgs["dpn92"]
+    model = DPN(num_init_channel=64, k_r=96, g=32, k_sec=(3, 4, 20, 3), inc_sec=(16, 32, 24, 128),
+                num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.dpn.dpn98(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 98 layers DPN model. +Refer to the base class models.DPN for more details.

+ +
+ Source code in mindcv/models/dpn.py +
276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
@register_model
+def dpn98(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DPN:
+    """Get 98 layers DPN model.
+     Refer to the base class `models.DPN` for more details."""
+    default_cfg = default_cfgs["dpn98"]
+    model = DPN(num_init_channel=96, k_r=160, g=40, k_sec=(3, 6, 20, 3), inc_sec=(16, 32, 32, 128),
+                num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

edgenext

+ + +
+ + + +

+ mindcv.models.edgenext.EdgeNeXt + + +

+ + +
+

+ Bases: nn.Cell

+ + +

EdgeNeXt model class, based on +"Efficiently Amalgamated CNN-Transformer Architecture for Mobile Vision" <https://arxiv.org/abs/2206.10589>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
in_channels +
+

number of input channels. Default: 3

+
+

+

+
num_classes +
+

number of classification classes. Default: 1000

+
+

+ + DEFAULT: + 1000 + +

+
depths +
+

the depths of each layer. Default: [0, 0, 0, 3]

+
+

+ + DEFAULT: + [3, 3, 9, 3] + +

+
dims +
+

the middle dim of each layer. Default: [24, 48, 88, 168]

+
+

+ + DEFAULT: + [24, 48, 88, 168] + +

+
global_block +
+

number of global block. Default: [0, 0, 0, 3]

+
+

+ + DEFAULT: + [0, 0, 0, 3] + +

+
global_block_type +
+

type of global block. Default: ['None', 'None', 'None', 'SDTA']

+
+

+ + DEFAULT: + ['None', 'None', 'None', 'SDTA'] + +

+
drop_path_rate +
+

Stochastic Depth. Default: 0.

+
+

+ + DEFAULT: + 0.0 + +

+
layer_scale_init_value +
+

value of layer scale initialization. Default: 1e-6

+
+

+ + DEFAULT: + 1e-06 + +

+
head_init_scale +
+

scale of head initialization. Default: 1.

+
+

+ + DEFAULT: + 1.0 + +

+
expan_ratio +
+

ratio of expansion. Default: 4

+
+

+ + DEFAULT: + 4 + +

+
kernel_sizes +
+

kernel sizes of different stages. Default: [7, 7, 7, 7]

+
+

+ + DEFAULT: + [7, 7, 7, 7] + +

+
heads +
+

number of attention heads. Default: [8, 8, 8, 8]

+
+

+ + DEFAULT: + [8, 8, 8, 8] + +

+
use_pos_embd_xca +
+

use position embedding in xca or not. Default: [False, False, False, False]

+
+

+ + DEFAULT: + [False, False, False, False] + +

+
use_pos_embd_global +
+

use position embedding globally or not. Default: False

+
+

+ + DEFAULT: + False + +

+
d2_scales +
+

scales of splitting channels

+
+

+ + DEFAULT: + [2, 3, 4, 5] + +

+
+ +
+ Source code in mindcv/models/edgenext.py +
296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
class EdgeNeXt(nn.Cell):
+    r"""EdgeNeXt model class, based on
+    `"Efficiently Amalgamated CNN-Transformer Architecture for Mobile Vision" <https://arxiv.org/abs/2206.10589>`_
+
+    Args:
+        in_channels: number of input channels. Default: 3
+        num_classes: number of classification classes. Default: 1000
+        depths: the depths of each layer. Default: [0, 0, 0, 3]
+        dims: the middle dim of each layer. Default: [24, 48, 88, 168]
+        global_block: number of global block. Default: [0, 0, 0, 3]
+        global_block_type: type of global block. Default: ['None', 'None', 'None', 'SDTA']
+        drop_path_rate: Stochastic Depth. Default: 0.
+        layer_scale_init_value: value of layer scale initialization. Default: 1e-6
+        head_init_scale: scale of head initialization. Default: 1.
+        expan_ratio: ratio of expansion. Default: 4
+        kernel_sizes: kernel sizes of different stages. Default: [7, 7, 7, 7]
+        heads: number of attention heads. Default: [8, 8, 8, 8]
+        use_pos_embd_xca: use position embedding in xca or not. Default: [False, False, False, False]
+        use_pos_embd_global: use position embedding globally or not. Default: False
+        d2_scales: scales of splitting channels
+    """
+    def __init__(self, in_chans=3, num_classes=1000,
+                 depths=[3, 3, 9, 3], dims=[24, 48, 88, 168],
+                 global_block=[0, 0, 0, 3], global_block_type=["None", "None", "None", "SDTA"],
+                 drop_path_rate=0., layer_scale_init_value=1e-6, head_init_scale=1., expan_ratio=4,
+                 kernel_sizes=[7, 7, 7, 7], heads=[8, 8, 8, 8], use_pos_embd_xca=[False, False, False, False],
+                 use_pos_embd_global=False, d2_scales=[2, 3, 4, 5], **kwargs):
+        super().__init__()
+        for g in global_block_type:
+            assert g in ["None", "SDTA"]
+        if use_pos_embd_global:
+            self.pos_embd = PositionalEncodingFourier(dim=dims[0])
+        else:
+            self.pos_embd = None
+        self.downsample_layers = nn.CellList()  # stem and 3 intermediate downsampling conv layers
+        stem = nn.SequentialCell(
+            nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4, has_bias=True),
+            LayerNorm((dims[0],), epsilon=1e-6, norm_axis=1),
+        )
+        self.downsample_layers.append(stem)
+        for i in range(3):
+            downsample_layer = nn.SequentialCell(
+                LayerNorm((dims[i],), epsilon=1e-6, norm_axis=1),
+                nn.Conv2d(dims[i], dims[i + 1], kernel_size=2, stride=2, has_bias=True),
+            )
+            self.downsample_layers.append(downsample_layer)
+
+        self.stages = nn.CellList()  # 4 feature resolution stages, each consisting of multiple residual blocks
+        dp_rates = list(np.linspace(0, drop_path_rate, sum(depths)))
+        cur = 0
+        for i in range(4):
+            stage_blocks = []
+            for j in range(depths[i]):
+                if j > depths[i] - global_block[i] - 1:
+                    if global_block_type[i] == "SDTA":
+                        stage_blocks.append(SDTAEncoder(dim=dims[i], drop_path=dp_rates[cur + j],
+                                                        expan_ratio=expan_ratio, scales=d2_scales[i],
+                                                        use_pos_emb=use_pos_embd_xca[i], num_heads=heads[i]))
+                    else:
+                        raise NotImplementedError
+                else:
+                    stage_blocks.append(ConvEncoder(dim=dims[i], drop_path=dp_rates[cur + j],
+                                                    layer_scale_init_value=layer_scale_init_value,
+                                                    expan_ratio=expan_ratio, kernel_size=kernel_sizes[i]))
+
+            self.stages.append(nn.SequentialCell(*stage_blocks))
+            cur += depths[i]
+        self.norm = nn.LayerNorm((dims[-1],), epsilon=1e-6)  # Final norm layer
+        self.head = nn.Dense(dims[-1], num_classes)
+
+        # self.head_dropout = Dropout(kwargs["classifier_dropout"])
+        self.head_dropout = Dropout(p=0.0)
+        self.head_init_scale = head_init_scale
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, (nn.Dense, nn.Conv2d)):
+                cell.weight.set_data(
+                    init.initializer(init.TruncatedNormal(sigma=0.02), cell.weight.shape, cell.weight.dtype)
+                )
+                if isinstance(cell, nn.Dense) and cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Zero(), cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, (nn.LayerNorm)):
+                cell.gamma.set_data(init.initializer(init.One(), cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer(init.Zero(), cell.beta.shape, cell.beta.dtype))
+        self.head.weight.set_data(self.head.weight * self.head_init_scale)
+        self.head.bias.set_data(self.head.bias * self.head_init_scale)
+
+    def forward_features(self, x):
+        x = self.downsample_layers[0](x)
+        x = self.stages[0](x)
+        if self.pos_embd is not None:
+            B, C, H, W = x.shape
+            x = x + self.pos_embd(B, H, W)
+        for i in range(1, 4):
+            x = self.downsample_layers[i](x)
+            x = self.stages[i](x)
+        return self.norm(x.mean([-2, -1]))  # Global average pooling, (N, C, H, W) -> (N, C)
+
+    def construct(self, x):
+        x = self.forward_features(x)
+        x = self.head(self.head_dropout(x))
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.edgenext.edgenext_base(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get edgenext_base model. +Refer to the base class models.EdgeNeXt for more details.

+ +
+ Source code in mindcv/models/edgenext.py +
472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
@register_model
+def edgenext_base(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> EdgeNeXt:
+    """Get edgenext_base model.
+    Refer to the base class `models.EdgeNeXt` for more details."""
+    default_cfg = default_cfgs["edgenext_base"]
+    model = EdgeNeXt(
+        depths=[3, 3, 9, 3],
+        dims=[80, 160, 288, 584],
+        expan_ratio=4,
+        num_classes=num_classes,
+        global_block=[0, 1, 1, 1],
+        global_block_type=["None", "SDTA", "SDTA", "SDTA"],
+        use_pos_embd_xca=[False, True, False, False],
+        kernel_sizes=[3, 5, 7, 9],
+        d2_scales=[2, 2, 3, 4],
+        **kwargs
+    )
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.edgenext.edgenext_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get edgenext_small model. +Refer to the base class models.EdgeNeXt for more details.

+ +
+ Source code in mindcv/models/edgenext.py +
450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
@register_model
+def edgenext_small(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> EdgeNeXt:
+    """Get edgenext_small model.
+    Refer to the base class `models.EdgeNeXt` for more details."""
+    default_cfg = default_cfgs["edgenext_small"]
+    model = EdgeNeXt(
+        depths=[3, 3, 9, 3],
+        dims=[48, 96, 160, 304],
+        expan_ratio=4,
+        num_classes=num_classes,
+        global_block=[0, 1, 1, 1],
+        global_block_type=["None", "SDTA", "SDTA", "SDTA"],
+        use_pos_embd_xca=[False, True, False, False],
+        kernel_sizes=[3, 5, 7, 9],
+        d2_scales=[2, 2, 3, 4],
+        **kwargs
+    )
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.edgenext.edgenext_x_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get edgenext_x_small model. +Refer to the base class models.EdgeNeXt for more details.

+ +
+ Source code in mindcv/models/edgenext.py +
427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
@register_model
+def edgenext_x_small(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> EdgeNeXt:
+    """Get edgenext_x_small model.
+    Refer to the base class `models.EdgeNeXt` for more details."""
+    default_cfg = default_cfgs["edgenext_x_small"]
+    model = EdgeNeXt(
+        depths=[3, 3, 9, 3],
+        dims=[32, 64, 100, 192],
+        expan_ratio=4,
+        num_classes=num_classes,
+        global_block=[0, 1, 1, 1],
+        global_block_type=["None", "SDTA", "SDTA", "SDTA"],
+        use_pos_embd_xca=[False, True, False, False],
+        kernel_sizes=[3, 5, 7, 9],
+        heads=[4, 4, 4, 4],
+        d2_scales=[2, 2, 3, 4],
+        **kwargs
+    )
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.edgenext.edgenext_xx_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get edgenext_xx_small model. +Refer to the base class models.EdgeNeXt for more details.

+ +
+ Source code in mindcv/models/edgenext.py +
403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
@register_model
+def edgenext_xx_small(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> EdgeNeXt:
+    """Get edgenext_xx_small model.
+        Refer to the base class `models.EdgeNeXt` for more details."""
+    default_cfg = default_cfgs["edgenext_xx_small"]
+    model = EdgeNeXt(
+        depths=[2, 2, 6, 2],
+        dims=[24, 48, 88, 168],
+        expan_ratio=4,
+        num_classes=num_classes,
+        global_block=[0, 1, 1, 1],
+        global_block_type=['None', 'SDTA', 'SDTA', 'SDTA'],
+        use_pos_embd_xca=[False, True, False, False],
+        kernel_sizes=[3, 5, 7, 9],
+        heads=[4, 4, 4, 4],
+        d2_scales=[2, 2, 3, 4],
+        **kwargs
+    )
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

efficientnet

+ + +
+ + + +

+ mindcv.models.efficientnet.EfficientNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

EfficientNet architecture. +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
arch +
+

The name of the model.

+
+

+ + TYPE: + str + +

+
dropout_rate +
+

The dropout rate of efficientnet.

+
+

+ + TYPE: + float + +

+
width_mult +
+

The ratio of the channel. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
depth_mult +
+

The ratio of num_layers. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
in_channels +
+

The input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

The number of class. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
inverted_residual_setting +
+

The settings of block. +Default: None.

+
+

+ + TYPE: + Sequence[Union[MBConvConfig, FusedMBConvConfig]] + + + DEFAULT: + None + +

+
drop_path_prob +
+

The drop path rate of MBConv. Default: 0.2.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.2 + +

+
norm_layer +
+

The normalization layer. Default: None.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + None + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, 1000).

+
+
+ Source code in mindcv/models/efficientnet.py +
275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
class EfficientNet(nn.Cell):
+    """
+    EfficientNet architecture.
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        arch (str): The name of the model.
+        dropout_rate (float): The dropout rate of efficientnet.
+        width_mult (float): The ratio of the channel. Default: 1.0.
+        depth_mult (float): The ratio of num_layers. Default: 1.0.
+        in_channels (int): The input channels. Default: 3.
+        num_classes (int): The number of class. Default: 1000.
+        inverted_residual_setting (Sequence[Union[MBConvConfig, FusedMBConvConfig]], optional): The settings of block.
+            Default: None.
+        drop_path_prob (float): The drop path rate of MBConv. Default: 0.2.
+        norm_layer (nn.Cell, optional): The normalization layer. Default: None.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, 1000)`.
+    """
+
+    def __init__(
+        self,
+        arch: str,
+        dropout_rate: float,
+        width_mult: float = 1.0,
+        depth_mult: float = 1.0,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+        inverted_residual_setting: Optional[Sequence[Union[MBConvConfig, FusedMBConvConfig]]] = None,
+        drop_path_prob: float = 0.2,
+        norm_layer: Optional[nn.Cell] = None,
+    ) -> None:
+        super().__init__()
+        self.last_channel = None
+
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+            if width_mult >= 1.6:
+                norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.99)
+
+        layers: List[nn.Cell] = []
+
+        if not inverted_residual_setting:
+            if arch.startswith("efficientnet_b"):
+                bneck_conf = partial(MBConvConfig, width_cnf=width_mult, depth_cnf=depth_mult)
+                inverted_residual_setting = [
+                    bneck_conf(1, 3, 1, 32, 16, 1),
+                    bneck_conf(6, 3, 2, 16, 24, 2),
+                    bneck_conf(6, 5, 2, 24, 40, 2),
+                    bneck_conf(6, 3, 2, 40, 80, 3),
+                    bneck_conf(6, 5, 1, 80, 112, 3),
+                    bneck_conf(6, 5, 2, 112, 192, 4),
+                    bneck_conf(6, 3, 1, 192, 320, 1),
+                ]
+            elif arch.startswith("efficientnet_v2_s"):
+                inverted_residual_setting = [
+                    FusedMBConvConfig(1, 3, 1, 24, 24, 2),
+                    FusedMBConvConfig(4, 3, 2, 24, 48, 4),
+                    FusedMBConvConfig(4, 3, 2, 48, 64, 4),
+                    MBConvConfig(4, 3, 2, 64, 128, 6),
+                    MBConvConfig(6, 3, 1, 128, 160, 9),
+                    MBConvConfig(6, 3, 2, 160, 256, 15),
+                ]
+                self.last_channel = 1280
+            elif arch.startswith("efficientnet_v2_m"):
+                inverted_residual_setting = [
+                    FusedMBConvConfig(1, 3, 1, 24, 24, 3),
+                    FusedMBConvConfig(4, 3, 2, 24, 48, 5),
+                    FusedMBConvConfig(4, 3, 2, 48, 80, 5),
+                    MBConvConfig(4, 3, 2, 80, 160, 7),
+                    MBConvConfig(6, 3, 1, 160, 176, 14),
+                    MBConvConfig(6, 3, 2, 176, 304, 18),
+                    MBConvConfig(6, 3, 1, 304, 512, 5),
+                ]
+                self.last_channel = 1280
+            elif arch.startswith("efficientnet_v2_l"):
+                inverted_residual_setting = [
+                    FusedMBConvConfig(1, 3, 1, 32, 32, 4),
+                    FusedMBConvConfig(4, 3, 2, 32, 64, 7),
+                    FusedMBConvConfig(4, 3, 2, 64, 96, 7),
+                    MBConvConfig(4, 3, 2, 96, 192, 10),
+                    MBConvConfig(6, 3, 1, 192, 224, 19),
+                    MBConvConfig(6, 3, 2, 224, 384, 25),
+                    MBConvConfig(6, 3, 1, 384, 640, 7),
+                ]
+                self.last_channel = 1280
+            elif arch.startswith("efficientnet_v2_xl"):
+                inverted_residual_setting = [
+                    FusedMBConvConfig(1, 3, 1, 32, 32, 4),
+                    FusedMBConvConfig(4, 3, 2, 32, 64, 8),
+                    FusedMBConvConfig(4, 3, 2, 64, 96, 8),
+                    MBConvConfig(4, 3, 2, 96, 192, 16),
+                    MBConvConfig(6, 3, 1, 192, 256, 24),
+                    MBConvConfig(6, 3, 2, 256, 512, 32),
+                    MBConvConfig(6, 3, 1, 512, 640, 8),
+                ]
+                self.last_channel = 1280
+
+        # building first layer
+        firstconv_output_channels = inverted_residual_setting[0].input_channels
+        layers.extend([
+            nn.Conv2d(in_channels, firstconv_output_channels, kernel_size=3, stride=2),
+            norm_layer(firstconv_output_channels),
+            Swish(),
+        ])
+
+        total_reduction = 2
+        self.feature_info = [dict(chs=firstconv_output_channels, reduction=total_reduction,
+                                  name=f'features.{len(layers) - 1}')]
+
+        # building MBConv blocks
+        total_stage_blocks = sum(cnf.num_layers for cnf in inverted_residual_setting)
+        stage_block_id = 0
+
+        # cnf is the settings of block
+        for cnf in inverted_residual_setting:
+            stage: List[nn.Cell] = []
+
+            # cnf.num_layers is the num of the same block
+            for _ in range(cnf.num_layers):
+                # copy to avoid modifications. shallow copy is enough
+                block_cnf = copy.copy(cnf)
+
+                block = MBConv
+
+                if "FusedMBConvConfig" in str(type(block_cnf)):
+                    block = FusedMBConv
+
+                # overwrite info if not the first conv in the stage
+                if stage:
+                    block_cnf.input_channels = block_cnf.out_channels
+                    block_cnf.stride = 1
+
+                # adjust dropout rate of blocks based on the depth of the stage block
+                sd_prob = drop_path_prob * float(stage_block_id) / total_stage_blocks
+
+                total_reduction *= block_cnf.stride
+
+                stage.append(block(block_cnf, sd_prob, norm_layer))
+                stage_block_id += 1
+
+            layers.append(nn.SequentialCell(stage))
+
+            self.feature_info.append(dict(chs=cnf.out_channels, reduction=total_reduction,
+                                          name=f'features.{len(layers) - 1}'))
+
+        # building last several layers
+        lastconv_input_channels = inverted_residual_setting[-1].out_channels
+        lastconv_output_channels = self.last_channel if self.last_channel is not None else 4 * lastconv_input_channels
+        layers.extend([
+            nn.Conv2d(lastconv_input_channels, lastconv_output_channels, kernel_size=1),
+            norm_layer(lastconv_output_channels),
+            Swish(),
+        ])
+
+        self.feature_info.append(dict(chs=lastconv_output_channels, reduction=total_reduction,
+                                      name=f'features.{len(layers) - 1}'))
+        self.flatten_sequential = True
+
+        self.features = nn.SequentialCell(layers)
+        self.avgpool = GlobalAvgPooling()
+        self.dropout = Dropout(p=dropout_rate)
+        self.mlp_head = nn.Dense(lastconv_output_channels, num_classes)
+        self._initialize_weights()
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+
+        x = self.avgpool(x)
+
+        if self.training:
+            x = self.dropout(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        return self.mlp_head(x)
+
+    def construct(self, x: Tensor) -> Tensor:
+        """construct"""
+        x = self.forward_features(x)
+        return self.forward_head(x)
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Dense):
+                init_range = 1.0 / np.sqrt(cell.weight.shape[0])
+                cell.weight.set_data(weight_init.initializer(Uniform(init_range), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))
+            if isinstance(cell, nn.Conv2d):
+                out_channel, _, kernel_size_h, kernel_size_w = cell.weight.shape
+                stddev = np.sqrt(2 / int(out_channel * kernel_size_h * kernel_size_w))
+                cell.weight.set_data(
+                    weight_init.initializer(Normal(sigma=stddev), cell.weight.shape, cell.weight.dtype)
+                )
+                if cell.bias is not None:
+                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.efficientnet.EfficientNet.construct(x) + +

+ + +
+ +

construct

+ +
+ Source code in mindcv/models/efficientnet.py +
456
+457
+458
+459
def construct(self, x: Tensor) -> Tensor:
+    """construct"""
+    x = self.forward_features(x)
+    return self.forward_head(x)
+
+
+
+ +
+ + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b0(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B0 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
+513
+514
@register_model
+def efficientnet_b0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B0 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b0", 1.0, 1.0, 0.2, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b1(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B1 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
517
+518
+519
+520
+521
+522
+523
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
@register_model
+def efficientnet_b1(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B1 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b1", 1.0, 1.1, 0.2, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B2 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
@register_model
+def efficientnet_b2(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B2 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b2", 1.1, 1.2, 0.3, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b3(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B3 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
@register_model
+def efficientnet_b3(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B3 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b3", 1.2, 1.4, 0.3, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b4(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B4 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
@register_model
+def efficientnet_b4(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B4 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b4", 1.4, 1.8, 0.4, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b5(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B5 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
+613
+614
@register_model
+def efficientnet_b5(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B5 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b5", 1.6, 2.2, 0.4, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b6(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B6 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
617
+618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
@register_model
+def efficientnet_b6(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B6 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b6", 1.8, 2.6, 0.5, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b7(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B7 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
@register_model
+def efficientnet_b7(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B7 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b7", 2.0, 3.1, 0.5, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_v2_l(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B4 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
697
+698
+699
+700
+701
+702
+703
+704
+705
+706
+707
+708
+709
+710
+711
+712
+713
+714
@register_model
+def efficientnet_v2_l(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B4 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_v2_l", 1.0, 1.0, 0.2, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_v2_m(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B4 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
677
+678
+679
+680
+681
+682
+683
+684
+685
+686
+687
+688
+689
+690
+691
+692
+693
+694
@register_model
+def efficientnet_v2_m(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B4 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_v2_m", 1.0, 1.0, 0.2, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_v2_s(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B4 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
657
+658
+659
+660
+661
+662
+663
+664
+665
+666
+667
+668
+669
+670
+671
+672
+673
+674
@register_model
+def efficientnet_v2_s(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B4 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_v2_s", 1.0, 1.0, 0.2, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_v2_xl(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B4 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
717
+718
+719
+720
+721
+722
+723
+724
+725
+726
+727
+728
+729
+730
+731
+732
+733
+734
@register_model
+def efficientnet_v2_xl(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B4 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_v2_xl", 1.0, 1.0, 0.2, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +

features

+

ghostnet

+ + +
+ + + +

+ mindcv.models.ghostnet.GhostNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

GhostNet model class, based on +"GhostNet: More Features from Cheap Operations " <https://arxiv.org/abs/1911.11907>_.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
width +
+

base width of hidden channel in blocks. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
in_channels +
+

number of input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
drop_rate +
+

the probability of the features before classification. Default: 0.2.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.2 + +

+
+ +
+ Source code in mindcv/models/ghostnet.py +
177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
class GhostNet(nn.Cell):
+    r"""GhostNet model class, based on
+    `"GhostNet: More Features from Cheap Operations " <https://arxiv.org/abs/1911.11907>`_.
+    Args:
+        num_classes: number of classification classes. Default: 1000.
+        width: base width of hidden channel in blocks. Default: 1.0.
+        in_channels: number of input channels. Default: 3.
+        drop_rate: the probability of the features before classification. Default: 0.2.
+    """
+
+    def __init__(
+        self,
+        num_classes: int = 1000,
+        width: float = 1.0,
+        in_channels: int = 3,
+        drop_rate: float = 0.2,
+    ) -> None:
+        super().__init__()
+        # setting of inverted residual blocks
+        self.num_classes = num_classes
+        self.drop_rate = drop_rate
+        self.cfgs = [
+            # k, t, c, SE, s
+            # stage1
+            [[3, 16, 16, 0, 1]],
+            # stage2
+            [[3, 48, 24, 0, 2]],
+            [[3, 72, 24, 0, 1]],
+            # stage3
+            [[5, 72, 40, 0.25, 2]],
+            [[5, 120, 40, 0.25, 1]],
+            # stage4
+            [[3, 240, 80, 0, 2]],
+            [[3, 200, 80, 0, 1],
+             [3, 184, 80, 0, 1],
+             [3, 184, 80, 0, 1],
+             [3, 480, 112, 0.25, 1],
+             [3, 672, 112, 0.25, 1]
+             ],
+            # stage5
+            [[5, 672, 160, 0.25, 2]],
+            [[5, 960, 160, 0, 1],
+             [5, 960, 160, 0.25, 1],
+             [5, 960, 160, 0, 1],
+             [5, 960, 160, 0.25, 1]
+             ]
+        ]
+
+        # building first layer
+        stem_chs = make_divisible(16 * width, 4)
+        self.conv_stem = nn.Conv2d(in_channels, stem_chs, 3, 2, pad_mode="pad", padding=1, has_bias=False)
+        self.bn1 = nn.BatchNorm2d(stem_chs)
+        self.act1 = nn.ReLU()
+        prev_chs = stem_chs
+
+        # building inverted residual blocks
+        stages = []
+        for cfg in self.cfgs:
+            layers = []
+            for k, exp_size, c, se_ratio, s in cfg:
+                out_chs = make_divisible(c * width, 4)
+                mid_chs = make_divisible(exp_size * width, 4)
+                layers.append(GhostBottleneck(prev_chs, mid_chs, out_chs, k, s, se_ratio=se_ratio))
+                prev_chs = out_chs
+            stages.append(nn.SequentialCell(layers))
+
+        out_chs = make_divisible(exp_size * width, 4)
+        stages.append(ConvBnAct(prev_chs, out_chs, 1))
+        prev_chs = out_chs
+
+        self.blocks = nn.SequentialCell(stages)
+
+        # building last several layers
+        self.num_features = out_chs = 1280
+        self.global_pool = GlobalAvgPooling(keep_dims=True)
+        self.conv_head = nn.Conv2d(prev_chs, out_chs, 1, 1, pad_mode="pad", padding=0, has_bias=True)
+        self.act2 = nn.ReLU()
+        self.flatten = nn.Flatten()
+        if self.drop_rate > 0.0:
+            self.dropout = Dropout(p=drop_rate)
+        self.classifier = nn.Dense(out_chs, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(init.initializer(init.HeUniform(), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(init.initializer(init.HeUniform(), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.conv_stem(x)
+        x = self.bn1(x)
+        x = self.act1(x)
+        x = self.blocks(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.global_pool(x)
+        x = self.conv_head(x)
+        x = self.act2(x)
+        x = self.flatten(x)
+        if self.drop_rate > 0.0:
+            x = self.dropout(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.ghostnet.ghostnet_050(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

GhostNet-0.5x

+ +
+ Source code in mindcv/models/ghostnet.py +
298
+299
+300
+301
+302
+303
+304
+305
+306
+307
@register_model
+def ghostnet_050(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """ GhostNet-0.5x """
+    default_cfg = default_cfgs["ghostnet_050"]
+    model = GhostNet(width=0.5, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.ghostnet.ghostnet_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

GhostNet-1.0x

+ +
+ Source code in mindcv/models/ghostnet.py +
310
+311
+312
+313
+314
+315
+316
+317
+318
+319
@register_model
+def ghostnet_100(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """ GhostNet-1.0x """
+    default_cfg = default_cfgs["ghostnet_100"]
+    model = GhostNet(width=1.0, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.ghostnet.ghostnet_130(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

GhostNet-1.3x

+ +
+ Source code in mindcv/models/ghostnet.py +
322
+323
+324
+325
+326
+327
+328
+329
+330
+331
@register_model
+def ghostnet_130(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """ GhostNet-1.3x """
+    default_cfg = default_cfgs["ghostnet_130"]
+    model = GhostNet(width=1.3, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

hrnet

+ + +
+ + + +

+ mindcv.models.hrnet.HRNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

HRNet Backbone, based on +"Deep High-Resolution Representation Learning for Visual Recognition" +<https://arxiv.org/abs/1908.07919>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
stage_cfg +
+

Configuration of the extra blocks. It accepts a dictionay +storing the detail config of each block. which include num_modules, +num_branches, block, num_blocks, num_channels. For detail example, +please check the implementation of hrnet_w32 and hrnet_w48.

+
+

+ + TYPE: + Dict[str, Dict[str, int]] + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

Number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Source code in mindcv/models/hrnet.py +
357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
+513
+514
+515
+516
+517
+518
+519
+520
+521
+522
+523
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
+613
+614
+615
+616
+617
+618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
+663
+664
+665
+666
+667
+668
+669
+670
+671
+672
+673
+674
+675
+676
+677
+678
+679
+680
+681
class HRNet(nn.Cell):
+    r"""HRNet Backbone, based on
+    `"Deep High-Resolution Representation Learning for Visual Recognition"
+    <https://arxiv.org/abs/1908.07919>`_.
+
+    Args:
+        stage_cfg: Configuration of the extra blocks. It accepts a dictionay
+            storing the detail config of each block. which include `num_modules`,
+            `num_branches`, `block`, `num_blocks`, `num_channels`. For detail example,
+            please check the implementation of `hrnet_w32` and `hrnet_w48`.
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: Number the channels of the input. Default: 3.
+    """
+
+    blocks_dict = {"BASIC": BasicBlock, "BOTTLENECK": Bottleneck}
+
+    def __init__(
+        self,
+        stage_cfg: Dict[str, Dict[str, int]],
+        num_classes: int = 1000,
+        in_channels: int = 3,
+    ) -> None:
+        super().__init__()
+
+        self.stage_cfg = stage_cfg
+        # stem net
+        self.conv1 = nn.Conv2d(
+            in_channels, 64, kernel_size=3, stride=2, padding=1, pad_mode="pad"
+        )
+        self.bn1 = nn.BatchNorm2d(64)
+        self.conv2 = nn.Conv2d(
+            64, 64, kernel_size=3, stride=2, padding=1, pad_mode="pad"
+        )
+        self.bn2 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU()
+
+        # stage 1
+        self.stage1_cfg = self.stage_cfg["stage1"]
+        num_channels = self.stage1_cfg["num_channels"][0]
+        num_blocks = self.stage1_cfg["num_blocks"][0]
+        block = self.blocks_dict[self.stage1_cfg["block"]]
+        self.layer1 = self._make_layer(block, 64, num_channels, num_blocks)
+
+        # stage 2
+        self.stage2_cfg = self.stage_cfg["stage2"]
+        num_channels = self.stage2_cfg["num_channels"]
+        block = self.blocks_dict[self.stage2_cfg["block"]]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))
+        ]
+
+        self.transition1, self.transition1_flags = self._make_transition_layer(
+            [256], num_channels
+        )
+        self.stage2, pre_stage_channels = self._make_stage(
+            self.stage2_cfg, num_channels
+        )
+
+        # stage 3
+        self.stage3_cfg = self.stage_cfg["stage3"]
+        num_channels = self.stage3_cfg["num_channels"]
+        block = self.blocks_dict[self.stage3_cfg["block"]]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))
+        ]
+
+        self.transition2, self.transition2_flags = self._make_transition_layer(
+            pre_stage_channels, num_channels
+        )
+        self.stage3, pre_stage_channels = self._make_stage(
+            self.stage3_cfg, num_channels
+        )
+
+        # stage 4
+        self.stage4_cfg = self.stage_cfg["stage4"]
+        num_channels = self.stage4_cfg["num_channels"]
+        block = self.blocks_dict[self.stage4_cfg["block"]]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))
+        ]
+        self.transition3, self.transition3_flags = self._make_transition_layer(
+            pre_stage_channels, num_channels
+        )
+        self.stage4, pre_stage_channels = self._make_stage(
+            self.stage4_cfg, num_channels
+        )
+
+        # head
+        self.pool = GlobalAvgPooling()
+        self.incre_modules, self.downsample_modules, self.final_layer = self._make_head(
+            pre_stage_channels
+        )
+        self.classifier = nn.Dense(2048, num_classes)
+
+    def _make_head(self, pre_stage_channels: List[int]):
+        head_block = Bottleneck
+        head_channels = [32, 64, 128, 256]
+
+        # increase the #channesl on each resolution
+        # from C, 2C, 4C, 8C to 128, 256, 512, 1024
+        incre_modules = list()
+        for i, channels in enumerate(pre_stage_channels):
+            incre_module = self._make_layer(
+                head_block, channels, head_channels[i], 1, stride=1
+            )
+            incre_modules.append(incre_module)
+        incre_modules = nn.CellList(incre_modules)
+
+        # downsample modules
+        downsamp_modules = []
+        for i in range(len(pre_stage_channels) - 1):
+            in_channels = head_channels[i] * head_block.expansion
+            out_channels = head_channels[i + 1] * head_block.expansion
+
+            downsamp_module = nn.SequentialCell(
+                nn.Conv2d(
+                    in_channels=in_channels,
+                    out_channels=out_channels,
+                    kernel_size=3,
+                    stride=2,
+                    pad_mode="pad",
+                    padding=1,
+                ),
+                nn.BatchNorm2d(out_channels),
+                nn.ReLU(),
+            )
+
+            downsamp_modules.append(downsamp_module)
+        downsamp_modules = nn.CellList(downsamp_modules)
+
+        final_layer = nn.SequentialCell(
+            nn.Conv2d(
+                in_channels=head_channels[3] * head_block.expansion,
+                out_channels=2048,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+            ),
+            nn.BatchNorm2d(2048),
+            nn.ReLU(),
+        )
+
+        return incre_modules, downsamp_modules, final_layer
+
+    def _make_transition_layer(
+        self, num_channels_pre_layer: List[int], num_channels_cur_layer: List[int]
+    ) -> Tuple[nn.CellList, List[bool]]:
+        num_branches_cur = len(num_channels_cur_layer)
+        num_branches_pre = len(num_channels_pre_layer)
+
+        transition_layers = []
+        transition_layers_flags = []
+        for i in range(num_branches_cur):
+            if i < num_branches_pre:
+                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+                    transition_layers.append(
+                        nn.SequentialCell(
+                            nn.Conv2d(
+                                num_channels_pre_layer[i],
+                                num_channels_cur_layer[i],
+                                kernel_size=3,
+                                padding=1,
+                                pad_mode="pad",
+                            ),
+                            nn.BatchNorm2d(num_channels_cur_layer[i]),
+                            nn.ReLU(),
+                        )
+                    )
+                    transition_layers_flags.append(True)
+                else:
+                    transition_layers.append(IdentityCell())
+                    transition_layers_flags.append(False)
+            else:
+                conv3x3s = []
+                for j in range(i + 1 - num_branches_pre):
+                    inchannels = num_channels_pre_layer[-1]
+                    outchannels = (
+                        num_channels_cur_layer[i]
+                        if j == i - num_branches_pre
+                        else inchannels
+                    )
+                    conv3x3s.append(
+                        nn.SequentialCell(
+                            [
+                                nn.Conv2d(
+                                    inchannels,
+                                    outchannels,
+                                    kernel_size=3,
+                                    stride=2,
+                                    padding=1,
+                                    pad_mode="pad",
+                                ),
+                                nn.BatchNorm2d(outchannels),
+                                nn.ReLU(),
+                            ]
+                        )
+                    )
+                transition_layers.append(nn.SequentialCell(conv3x3s))
+                transition_layers_flags.append(True)
+
+        return nn.CellList(transition_layers), transition_layers_flags
+
+    def _make_layer(
+        self,
+        block: Type[Union[BasicBlock, Bottleneck]],
+        in_channels: int,
+        out_channels: int,
+        blocks: int,
+        stride: int = 1,
+    ) -> nn.SequentialCell:
+        downsample = None
+        if stride != 1 or in_channels != out_channels * block.expansion:
+            downsample = nn.SequentialCell(
+                nn.Conv2d(
+                    in_channels,
+                    out_channels * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                ),
+                nn.BatchNorm2d(out_channels * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(in_channels, out_channels, stride, down_sample=downsample))
+        for _ in range(1, blocks):
+            layers.append(block(out_channels * block.expansion, out_channels))
+
+        return nn.SequentialCell(layers)
+
+    def _make_stage(
+        self,
+        layer_config: Dict[str, int],
+        num_inchannels: int,
+        multi_scale_output: bool = True,
+    ) -> Tuple[nn.SequentialCell, List[int]]:
+        num_modules = layer_config["num_modules"]
+        num_branches = layer_config["num_branches"]
+        num_blocks = layer_config["num_blocks"]
+        num_channels = layer_config["num_channels"]
+        block = self.blocks_dict[layer_config["block"]]
+
+        modules = []
+        for i in range(num_modules):
+            # multi_scale_output is only used last module
+            if not multi_scale_output and i == num_modules - 1:
+                reset_multi_scale_output = False
+            else:
+                reset_multi_scale_output = True
+
+            modules.append(
+                HRModule(
+                    num_branches,
+                    block,
+                    num_blocks,
+                    num_inchannels,
+                    num_channels,
+                    reset_multi_scale_output,
+                )
+            )
+            num_inchannels = modules[-1].num_inchannels
+
+        return nn.SequentialCell(modules), num_inchannels
+
+    def forward_features(self, x: Tensor) -> List[Tensor]:
+        """Perform the feature extraction.
+
+        Args:
+            x: Tensor
+
+        Returns:
+            Extracted feature
+        """
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = self.relu(x)
+
+        # stage 1
+        x = self.layer1(x)
+
+        # stage 2
+        x_list = []
+        for i in range(self.stage2_cfg["num_branches"]):
+            if self.transition1_flags[i]:
+                x_list.append(self.transition1[i](x))
+            else:
+                x_list.append(x)
+        y_list = self.stage2(x_list)
+
+        # stage 3
+        x_list = []
+        for i in range(self.stage3_cfg["num_branches"]):
+            if self.transition2_flags[i]:
+                x_list.append(self.transition2[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y_list = self.stage3(x_list)
+
+        # stage 4
+        x_list = []
+        for i in range(self.stage4_cfg["num_branches"]):
+            if self.transition3_flags[i]:
+                x_list.append(self.transition3[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y = self.stage4(x_list)
+
+        return y
+
+    def forward_head(self, x: List[Tensor]) -> Tensor:
+        y = self.incre_modules[0](x[0])
+        for i in range(len(self.downsample_modules)):
+            y = self.incre_modules[i + 1](x[i + 1]) + self.downsample_modules[i](y)
+
+        y = self.final_layer(y)
+        y = self.pool(y)
+        y = self.classifier(y)
+        return y
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.hrnet.HRNet.forward_features(x) + +

+ + +
+ +

Perform the feature extraction.

+ + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
x +
+

Tensor

+
+

+ + TYPE: + Tensor + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + List[Tensor] + + +
+

Extracted feature

+
+
+ +
+ Source code in mindcv/models/hrnet.py +
620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
+663
+664
+665
+666
def forward_features(self, x: Tensor) -> List[Tensor]:
+    """Perform the feature extraction.
+
+    Args:
+        x: Tensor
+
+    Returns:
+        Extracted feature
+    """
+    x = self.conv1(x)
+    x = self.bn1(x)
+    x = self.relu(x)
+    x = self.conv2(x)
+    x = self.bn2(x)
+    x = self.relu(x)
+
+    # stage 1
+    x = self.layer1(x)
+
+    # stage 2
+    x_list = []
+    for i in range(self.stage2_cfg["num_branches"]):
+        if self.transition1_flags[i]:
+            x_list.append(self.transition1[i](x))
+        else:
+            x_list.append(x)
+    y_list = self.stage2(x_list)
+
+    # stage 3
+    x_list = []
+    for i in range(self.stage3_cfg["num_branches"]):
+        if self.transition2_flags[i]:
+            x_list.append(self.transition2[i](y_list[-1]))
+        else:
+            x_list.append(y_list[i])
+    y_list = self.stage3(x_list)
+
+    # stage 4
+    x_list = []
+    for i in range(self.stage4_cfg["num_branches"]):
+        if self.transition3_flags[i]:
+            x_list.append(self.transition3[i](y_list[-1]))
+        else:
+            x_list.append(y_list[i])
+    y = self.stage4(x_list)
+
+    return y
+
+
+
+ +
+ + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.hrnet.hrnet_w32(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get HRNet with width=32 model. +Refer to the base class models.HRNet for more details.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

Whether the model is pretrained. Default: False

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

number of classification classes. Default: 1000

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

Number of input channels. Default: 3

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + Union[HRNet, HRNetFeatures] + + +
+

HRNet model

+
+
+ +
+ Source code in mindcv/models/hrnet.py +
756
+757
+758
+759
+760
+761
+762
+763
+764
+765
+766
+767
+768
+769
+770
+771
+772
+773
+774
+775
+776
+777
+778
+779
+780
+781
+782
+783
+784
+785
+786
+787
+788
+789
+790
+791
+792
+793
+794
+795
+796
+797
+798
+799
+800
+801
+802
+803
@register_model
+def hrnet_w32(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> Union[HRNet, HRNetFeatures]:
+    """Get HRNet with width=32 model.
+    Refer to the base class `models.HRNet` for more details.
+
+    Args:
+        pretrained: Whether the model is pretrained. Default: False
+        num_classes: number of classification classes. Default: 1000
+        in_channels: Number of input channels. Default: 3
+
+    Returns:
+        HRNet model
+    """
+    default_cfg = default_cfgs["hrnet_w32"]
+    stage_cfg = dict(
+        stage1=dict(
+            num_modules=1,
+            num_branches=1,
+            block="BOTTLENECK",
+            num_blocks=[4],
+            num_channels=[64],
+        ),
+        stage2=dict(
+            num_modules=1,
+            num_branches=2,
+            block="BASIC",
+            num_blocks=[4, 4],
+            num_channels=[32, 64],
+        ),
+        stage3=dict(
+            num_modules=4,
+            num_branches=3,
+            block="BASIC",
+            num_blocks=[4, 4, 4],
+            num_channels=[32, 64, 128],
+        ),
+        stage4=dict(
+            num_modules=3,
+            num_branches=4,
+            block="BASIC",
+            num_blocks=[4, 4, 4, 4],
+            num_channels=[32, 64, 128, 256],
+        ),
+    )
+    model_args = dict(stage_cfg=stage_cfg, num_classes=num_classes, in_channels=in_channels, **kwargs)
+    return _create_hrnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.hrnet.hrnet_w48(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get HRNet with width=48 model. +Refer to the base class models.HRNet for more details.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

Whether the model is pretrained. Default: False

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

number of classification classes. Default: 1000

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

Number of input channels. Default: 3

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + Union[HRNet, HRNetFeatures] + + +
+

HRNet model

+
+
+ +
+ Source code in mindcv/models/hrnet.py +
806
+807
+808
+809
+810
+811
+812
+813
+814
+815
+816
+817
+818
+819
+820
+821
+822
+823
+824
+825
+826
+827
+828
+829
+830
+831
+832
+833
+834
+835
+836
+837
+838
+839
+840
+841
+842
+843
+844
+845
+846
+847
+848
+849
+850
+851
+852
+853
@register_model
+def hrnet_w48(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> Union[HRNet, HRNetFeatures]:
+    """Get HRNet with width=48 model.
+    Refer to the base class `models.HRNet` for more details.
+
+    Args:
+        pretrained: Whether the model is pretrained. Default: False
+        num_classes: number of classification classes. Default: 1000
+        in_channels: Number of input channels. Default: 3
+
+    Returns:
+        HRNet model
+    """
+    default_cfg = default_cfgs["hrnet_w48"]
+    stage_cfg = dict(
+        stage1=dict(
+            num_modules=1,
+            num_branches=1,
+            block="BOTTLENECK",
+            num_blocks=[4],
+            num_channels=[64],
+        ),
+        stage2=dict(
+            num_modules=1,
+            num_branches=2,
+            block="BASIC",
+            num_blocks=[4, 4],
+            num_channels=[48, 96],
+        ),
+        stage3=dict(
+            num_modules=4,
+            num_branches=3,
+            block="BASIC",
+            num_blocks=[4, 4, 4],
+            num_channels=[48, 96, 192],
+        ),
+        stage4=dict(
+            num_modules=3,
+            num_branches=4,
+            block="BASIC",
+            num_blocks=[4, 4, 4, 4],
+            num_channels=[48, 96, 192, 384],
+        ),
+    )
+    model_args = dict(stage_cfg=stage_cfg, num_classes=num_classes, in_channels=in_channels, **kwargs)
+    return _create_hrnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +

inceptionv3

+ + +
+ + + +

+ mindcv.models.inceptionv3.InceptionV3 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Inception v3 model architecture from +"Rethinking the Inception Architecture for Computer Vision" <https://arxiv.org/abs/1512.00567>_.

+

.. note:: + Important: In contrast to the other models the inception_v3 expects tensors with a size of + N x 3 x 299 x 299, so ensure your images are sized accordingly.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
aux_logits +
+

use auxiliary classifier or not. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
drop_rate +
+

dropout rate of the layer before main classifier. Default: 0.2.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.2 + +

+
+ +
+ Source code in mindcv/models/inceptionv3.py +
224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
class InceptionV3(nn.Cell):
+    r"""Inception v3 model architecture from
+    `"Rethinking the Inception Architecture for Computer Vision" <https://arxiv.org/abs/1512.00567>`_.
+
+    .. note::
+        **Important**: In contrast to the other models the inception_v3 expects tensors with a size of
+        N x 3 x 299 x 299, so ensure your images are sized accordingly.
+
+    Args:
+        num_classes: number of classification classes. Default: 1000.
+        aux_logits: use auxiliary classifier or not. Default: False.
+        in_channels: number the channels of the input. Default: 3.
+        drop_rate: dropout rate of the layer before main classifier. Default: 0.2.
+    """
+
+    def __init__(
+        self,
+        num_classes: int = 1000,
+        aux_logits: bool = True,
+        in_channels: int = 3,
+        drop_rate: float = 0.2,
+    ) -> None:
+        super().__init__()
+        self.aux_logits = aux_logits
+        self.conv1a = BasicConv2d(in_channels, 32, kernel_size=3, stride=2, pad_mode="valid")
+        self.conv2a = BasicConv2d(32, 32, kernel_size=3, stride=1, pad_mode="valid")
+        self.conv2b = BasicConv2d(32, 64, kernel_size=3, stride=1)
+        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2)
+        self.conv3b = BasicConv2d(64, 80, kernel_size=1)
+        self.conv4a = BasicConv2d(80, 192, kernel_size=3, pad_mode="valid")
+        self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2)
+        self.inception5b = InceptionA(192, pool_features=32)
+        self.inception5c = InceptionA(256, pool_features=64)
+        self.inception5d = InceptionA(288, pool_features=64)
+        self.inception6a = InceptionB(288)
+        self.inception6b = InceptionC(768, channels_7x7=128)
+        self.inception6c = InceptionC(768, channels_7x7=160)
+        self.inception6d = InceptionC(768, channels_7x7=160)
+        self.inception6e = InceptionC(768, channels_7x7=192)
+        if self.aux_logits:
+            self.aux = InceptionAux(768, num_classes)
+        self.inception7a = InceptionD(768)
+        self.inception7b = InceptionE(1280)
+        self.inception7c = InceptionE(2048)
+
+        self.pool = GlobalAvgPooling()
+        self.dropout = Dropout(p=drop_rate)
+        self.num_features = 2048
+        self.classifier = nn.Dense(self.num_features, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.XavierUniform(), cell.weight.shape, cell.weight.dtype))
+
+    def forward_preaux(self, x: Tensor) -> Tensor:
+        x = self.conv1a(x)
+        x = self.conv2a(x)
+        x = self.conv2b(x)
+        x = self.maxpool1(x)
+        x = self.conv3b(x)
+        x = self.conv4a(x)
+        x = self.maxpool2(x)
+        x = self.inception5b(x)
+        x = self.inception5c(x)
+        x = self.inception5d(x)
+        x = self.inception6a(x)
+        x = self.inception6b(x)
+        x = self.inception6c(x)
+        x = self.inception6d(x)
+        x = self.inception6e(x)
+        return x
+
+    def forward_postaux(self, x: Tensor) -> Tensor:
+        x = self.inception7a(x)
+        x = self.inception7b(x)
+        x = self.inception7c(x)
+        return x
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.forward_preaux(x)
+        x = self.forward_postaux(x)
+        return x
+
+    def construct(self, x: Tensor) -> Union[Tensor, Tuple[Tensor, Tensor]]:
+        x = self.forward_preaux(x)
+        if self.training and self.aux_logits:
+            aux = self.aux(x)
+        else:
+            aux = None
+        x = self.forward_postaux(x)
+
+        x = self.pool(x)
+        x = self.dropout(x)
+        x = self.classifier(x)
+
+        if self.training and self.aux_logits:
+            return x, aux
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.inceptionv3.inception_v3(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get InceptionV3 model. +Refer to the base class models.InceptionV3 for more details.

+ +
+ Source code in mindcv/models/inceptionv3.py +
328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
@register_model
+def inception_v3(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> InceptionV3:
+    """Get InceptionV3 model.
+    Refer to the base class `models.InceptionV3` for more details."""
+    default_cfg = default_cfgs["inception_v3"]
+    model = InceptionV3(num_classes=num_classes, aux_logits=True, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

inceptionv4

+ + +
+ + + +

+ mindcv.models.inceptionv4.InceptionV4 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Inception v4 model architecture from +"Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning" <https://arxiv.org/abs/1602.07261>_. # noqa: E501

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
drop_rate +
+

dropout rate of the layer before main classifier. Default: 0.2.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.2 + +

+
+ +
+ Source code in mindcv/models/inceptionv4.py +
253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
class InceptionV4(nn.Cell):
+    r"""Inception v4 model architecture from
+    `"Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning" <https://arxiv.org/abs/1602.07261>`_.  # noqa: E501
+
+    Args:
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number the channels of the input. Default: 3.
+        drop_rate: dropout rate of the layer before main classifier. Default: 0.2.
+    """
+
+    def __init__(
+        self,
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        drop_rate: float = 0.2,
+    ) -> None:
+        super().__init__()
+        blocks = [Stem(in_channels)]
+        for _ in range(4):
+            blocks.append(InceptionA())
+        blocks.append(ReductionA())
+        for _ in range(7):
+            blocks.append(InceptionB())
+        blocks.append(ReductionB())
+        for _ in range(3):
+            blocks.append(InceptionC())
+        self.features = nn.SequentialCell(blocks)
+
+        self.pool = GlobalAvgPooling()
+        self.dropout = Dropout(p=drop_rate)
+        self.num_features = 1536
+        self.classifier = nn.Dense(self.num_features, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.XavierUniform(), cell.weight.shape, cell.weight.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.dropout(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.inceptionv4.inception_v4(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get InceptionV4 model. +Refer to the base class models.InceptionV4 for more details.

+ +
+ Source code in mindcv/models/inceptionv4.py +
310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
@register_model
+def inception_v4(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> InceptionV4:
+    """Get InceptionV4 model.
+    Refer to the base class `models.InceptionV4` for more details."""
+    default_cfg = default_cfgs["inception_v4"]
+    model = InceptionV4(num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

mixnet

+ + +
+ + + +

+ mindcv.models.mixnet.MixNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

MixNet model class, based on +"MixConv: Mixed Depthwise Convolutional Kernels" <https://arxiv.org/abs/1907.09595>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
arch +
+

size of the architecture. "small", "medium" or "large". Default: "small".

+
+

+ + TYPE: + str + + + DEFAULT: + 'small' + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number of the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
feature_size +
+

numbet of the channels of the output features. Default: 1536.

+
+

+ + TYPE: + int + + + DEFAULT: + 1536 + +

+
drop_rate +
+

rate of dropout for classifier. Default: 0.2.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.2 + +

+
depth_multiplier +
+

expansion coefficient of channels. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
+ +
+ Source code in mindcv/models/mixnet.py +
227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
class MixNet(nn.Cell):
+    r"""MixNet model class, based on
+    `"MixConv: Mixed Depthwise Convolutional Kernels" <https://arxiv.org/abs/1907.09595>`_
+
+    Args:
+        arch: size of the architecture. "small", "medium" or "large". Default: "small".
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number of the channels of the input. Default: 3.
+        feature_size: numbet of the channels of the output features. Default: 1536.
+        drop_rate: rate of dropout for classifier. Default: 0.2.
+        depth_multiplier: expansion coefficient of channels. Default: 1.0.
+    """
+
+    def __init__(
+        self,
+        arch: str = "small",
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        feature_size: int = 1536,
+        drop_rate: float = 0.2,
+        depth_multiplier: float = 1.0
+    ) -> None:
+        super(MixNet, self).__init__()
+        if arch == "small":
+            block_configs = [
+                [16, 16, [3], [1], [1], 1, 1, "ReLU", 0.0],
+                [16, 24, [3], [1, 1], [1, 1], 2, 6, "ReLU", 0.0],
+                [24, 24, [3], [1, 1], [1, 1], 1, 3, "ReLU", 0.0],
+                [24, 40, [3, 5, 7], [1], [1], 2, 6, "Swish", 0.5],
+                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, "Swish", 0.5],
+                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, "Swish", 0.5],
+                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, "Swish", 0.5],
+                [40, 80, [3, 5, 7], [1], [1, 1], 2, 6, "Swish", 0.25],
+                [80, 80, [3, 5], [1], [1, 1], 1, 6, "Swish", 0.25],
+                [80, 80, [3, 5], [1], [1, 1], 1, 6, "Swish", 0.25],
+                [80, 120, [3, 5, 7], [1, 1], [1, 1], 1, 6, "Swish", 0.5],
+                [120, 120, [3, 5, 7, 9], [1, 1], [1, 1], 1, 3, "Swish", 0.5],
+                [120, 120, [3, 5, 7, 9], [1, 1], [1, 1], 1, 3, "Swish", 0.5],
+                [120, 200, [3, 5, 7, 9, 11], [1], [1], 2, 6, "Swish", 0.5],
+                [200, 200, [3, 5, 7, 9], [1], [1, 1], 1, 6, "Swish", 0.5],
+                [200, 200, [3, 5, 7, 9], [1], [1, 1], 1, 6, "Swish", 0.5]
+            ]
+            stem_channels = 16
+            drop_rate = drop_rate
+        else:
+            block_configs = [
+                [24, 24, [3], [1], [1], 1, 1, "ReLU", 0.0],
+                [24, 32, [3, 5, 7], [1, 1], [1, 1], 2, 6, "ReLU", 0.0],
+                [32, 32, [3], [1, 1], [1, 1], 1, 3, "ReLU", 0.0],
+                [32, 40, [3, 5, 7, 9], [1], [1], 2, 6, "Swish", 0.5],
+                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, "Swish", 0.5],
+                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, "Swish", 0.5],
+                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, "Swish", 0.5],
+                [40, 80, [3, 5, 7], [1], [1], 2, 6, "Swish", 0.25],
+                [80, 80, [3, 5, 7, 9], [1, 1], [1, 1], 1, 6, "Swish", 0.25],
+                [80, 80, [3, 5, 7, 9], [1, 1], [1, 1], 1, 6, "Swish", 0.25],
+                [80, 80, [3, 5, 7, 9], [1, 1], [1, 1], 1, 6, "Swish", 0.25],
+                [80, 120, [3], [1], [1], 1, 6, "Swish", 0.5],
+                [120, 120, [3, 5, 7, 9], [1, 1], [1, 1], 1, 3, "Swish", 0.5],
+                [120, 120, [3, 5, 7, 9], [1, 1], [1, 1], 1, 3, "Swish", 0.5],
+                [120, 120, [3, 5, 7, 9], [1, 1], [1, 1], 1, 3, "Swish", 0.5],
+                [120, 200, [3, 5, 7, 9], [1], [1], 2, 6, "Swish", 0.5],
+                [200, 200, [3, 5, 7, 9], [1], [1, 1], 1, 6, "Swish", 0.5],
+                [200, 200, [3, 5, 7, 9], [1], [1, 1], 1, 6, "Swish", 0.5],
+                [200, 200, [3, 5, 7, 9], [1], [1, 1], 1, 6, "Swish", 0.5]
+            ]
+            if arch == "medium":
+                stem_channels = 24
+                drop_rate = drop_rate
+            elif arch == "large":
+                stem_channels = 24
+                depth_multiplier *= 1.3
+                drop_rate = drop_rate
+            else:
+                raise ValueError(f"Unsupported model type {arch}")
+
+        if depth_multiplier != 1.0:
+            stem_channels = _roundchannels(stem_channels * depth_multiplier)
+
+            for i, conf in enumerate(block_configs):
+                conf_ls = list(conf)
+                conf_ls[0] = _roundchannels(conf_ls[0] * depth_multiplier)
+                conf_ls[1] = _roundchannels(conf_ls[1] * depth_multiplier)
+                block_configs[i] = tuple(conf_ls)
+
+        # stem convolution
+        self.stem_conv = nn.SequentialCell([
+            nn.Conv2d(in_channels, stem_channels, 3, stride=2, pad_mode="pad", padding=1),
+            nn.BatchNorm2d(stem_channels),
+            nn.ReLU()
+        ])
+
+        # building MixNet blocks
+        layers = []
+        for inc, outc, k, ek, pk, s, er, ac, se in block_configs:
+            layers.append(MixNetBlock(
+                inc,
+                outc,
+                kernel_size=k,
+                expand_ksize=ek,
+                project_ksize=pk,
+                stride=s,
+                expand_ratio=er,
+                activation=ac,
+                se_ratio=se
+            ))
+        self.layers = nn.SequentialCell(layers)
+
+        # head
+        self.head_conv = nn.SequentialCell([
+            nn.Conv2d(block_configs[-1][1], feature_size, 1, pad_mode="pad", padding=0),
+            nn.BatchNorm2d(feature_size),
+            nn.ReLU()
+        ])
+
+        self.pool = GlobalAvgPooling()
+        self.dropout = Dropout(p=drop_rate)
+        self.classifier = nn.Dense(feature_size, num_classes)
+
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                fan_out = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels
+                cell.weight.set_data(
+                    init.initializer(init.Normal(math.sqrt(2.0 / fan_out)),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.Uniform(1.0 / math.sqrt(cell.weight.shape[0])),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.stem_conv(x)
+        x = self.layers(x)
+        x = self.head_conv(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.dropout(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.mixnet.mixnet_l(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mixnet.py +
409
+410
+411
+412
+413
+414
+415
+416
+417
@register_model
+def mixnet_l(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["mixnet_l"]
+    model = MixNet(arch="large", in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mixnet.mixnet_m(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mixnet.py +
398
+399
+400
+401
+402
+403
+404
+405
+406
@register_model
+def mixnet_m(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["mixnet_m"]
+    model = MixNet(arch="medium", in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mixnet.mixnet_s(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mixnet.py +
387
+388
+389
+390
+391
+392
+393
+394
+395
@register_model
+def mixnet_s(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["mixnet_s"]
+    model = MixNet(arch="small", in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

mlpmixer

+ + +
+ + + +

+ mindcv.models.mlpmixer.MLPMixer + + +

+ + +
+

+ Bases: nn.Cell

+ + +

MLP-Mixer model class, based on +"MLP-Mixer: An all-MLP Architecture for Vision" <https://arxiv.org/abs/2105.01601>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
depth +
+

number of MixerBlocks.

+
+

+ + TYPE: + int) + +

+
patch_size +
+

size of a single image patch.

+
+

+ + TYPE: + int or tuple) + +

+
n_patches +
+

number of patches.

+
+

+ + TYPE: + int) + +

+
n_channels +
+

channels(dimension) of a single embedded patch.

+
+

+ + TYPE: + int) + +

+
token_dim +
+

hidden dim of token-mixing MLP.

+
+

+ + TYPE: + int) + +

+
channel_dim +
+

hidden dim of channel-mixing MLP.

+
+

+ + TYPE: + int) + +

+
num_classes +
+

number of classification classes.

+
+

+ + TYPE: + int) + + + DEFAULT: + 1000 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + DEFAULT: + 3 + +

+
+ +
+ Source code in mindcv/models/mlpmixer.py +
104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
class MLPMixer(nn.Cell):
+    r"""MLP-Mixer model class, based on
+    `"MLP-Mixer: An all-MLP Architecture for Vision" <https://arxiv.org/abs/2105.01601>`_
+
+    Args:
+        depth (int) : number of MixerBlocks.
+        patch_size (int or tuple) : size of a single image patch.
+        n_patches (int) : number of patches.
+        n_channels (int) : channels(dimension) of a single embedded patch.
+        token_dim (int) : hidden dim of token-mixing MLP.
+        channel_dim (int) : hidden dim of channel-mixing MLP.
+        num_classes (int) : number of classification classes.
+        in_channels: number the channels of the input. Default: 3.
+    """
+
+    def __init__(self, depth, patch_size, n_patches, n_channels, token_dim, channel_dim, num_classes=1000,
+                 in_channels=3):
+        super().__init__()
+        self.n_patches = n_patches
+        self.n_channels = n_channels
+        # patch with shape of (3, patch_size, patch_size) is embedded to n_channels dim feature.
+        self.to_patch_embedding = nn.SequentialCell(
+            nn.Conv2d(in_channels, n_channels, patch_size, patch_size, pad_mode="pad", padding=0),
+            TransPose(permutation=(0, 2, 1), embedding=True),
+        )
+        self.mixer_blocks = nn.SequentialCell()
+        for _ in range(depth):
+            self.mixer_blocks.append(MixerBlock(n_patches, n_channels, token_dim, channel_dim))
+        self.layer_norm = nn.LayerNorm((n_channels,))
+        self.mlp_head = nn.Dense(n_channels, num_classes)
+        self.mean = ops.ReduceMean()
+        self._initialize_weights()
+
+    def construct(self, x):
+        x = self.to_patch_embedding(x)
+        x = self.mixer_blocks(x)
+        x = self.layer_norm(x)
+        x = self.mean(x, 1)
+        return self.mlp_head(x)
+
+    def _initialize_weights(self):
+        # todo: implement weights init
+        pass
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.mlpmixer.mlp_mixer_b_p16(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mlpmixer.py +
203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
@register_model
+def mlp_mixer_b_p16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 12, 16, 196, 768, 384, 3072
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,
+                     num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    default_cfg = default_cfgs["mlp_mixer_b_p16"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mlpmixer.mlp_mixer_b_p32(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mlpmixer.py +
189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
@register_model
+def mlp_mixer_b_p32(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 12, 32, 49, 768, 384, 3072
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,
+                     num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    default_cfg = default_cfgs["mlp_mixer_b_p32"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mlpmixer.mlp_mixer_h_p14(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mlpmixer.py +
245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
@register_model
+def mlp_mixer_h_p14(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 32, 14, 256, 1280, 640, 5120
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,
+                     num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    default_cfg = default_cfgs["mlp_mixer_h_p14"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mlpmixer.mlp_mixer_l_p16(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mlpmixer.py +
231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
@register_model
+def mlp_mixer_l_p16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 24, 16, 196, 1024, 512, 4096
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,
+                     num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    default_cfg = default_cfgs["mlp_mixer_l_p16"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mlpmixer.mlp_mixer_l_p32(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mlpmixer.py +
217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
@register_model
+def mlp_mixer_l_p32(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 24, 32, 49, 1024, 512, 4096
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,
+                     num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    default_cfg = default_cfgs["mlp_mixer_l_p32"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mlpmixer.mlp_mixer_s_p16(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mlpmixer.py +
175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
@register_model
+def mlp_mixer_s_p16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 8, 16, 196, 512, 256, 2048
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,
+                     num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    default_cfg = default_cfgs["mlp_mixer_s_p16"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mlpmixer.mlp_mixer_s_p32(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mlpmixer.py +
160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
@register_model
+def mlp_mixer_s_p32(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    # number_of_layers, patch_resolution, length_of_sequence, hidden_size, mpl_dim_sequence, mpl_dim_channel
+    nl, pr, ls, hs, ds, dc = 8, 32, 49, 512, 256, 2048
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs,
+                     token_dim=ds, channel_dim=dc, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    default_cfg = default_cfgs["mlp_mixer_s_p32"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

mnasnet

+ + +
+ + + +

+ mindcv.models.mnasnet.Mnasnet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

MnasNet model architecture from +"MnasNet: Platform-Aware Neural Architecture Search for Mobile" <https://arxiv.org/abs/1807.11626>_.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
alpha +
+

scale factor of model width.

+
+

+ + TYPE: + float + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
drop_rate +
+

dropout rate of the layer before main classifier. Default: 0.2.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.2 + +

+
+ +
+ Source code in mindcv/models/mnasnet.py +
 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
class Mnasnet(nn.Cell):
+    r"""MnasNet model architecture from
+    `"MnasNet: Platform-Aware Neural Architecture Search for Mobile" <https://arxiv.org/abs/1807.11626>`_.
+
+    Args:
+        alpha: scale factor of model width.
+        in_channels: number the channels of the input. Default: 3.
+        num_classes: number of classification classes. Default: 1000.
+        drop_rate: dropout rate of the layer before main classifier. Default: 0.2.
+    """
+
+    def __init__(
+        self,
+        alpha: float,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+        drop_rate: float = 0.2,
+    ):
+        super().__init__()
+
+        inverted_residual_setting = [
+            # t, c, n, s, k
+            [3, 24, 3, 2, 3],  # -> 56x56
+            [3, 40, 3, 2, 5],  # -> 28x28
+            [6, 80, 3, 2, 5],  # -> 14x14
+            [6, 96, 2, 1, 3],  # -> 14x14
+            [6, 192, 4, 2, 5],  # -> 7x7
+            [6, 320, 1, 1, 3],  # -> 7x7
+        ]
+
+        mid_channels = make_divisible(32 * alpha, 8)
+        input_channels = make_divisible(16 * alpha, 8)
+
+        features: List[nn.Cell] = [
+            nn.Conv2d(in_channels, mid_channels, kernel_size=3, stride=2, pad_mode="pad", padding=1),
+            nn.BatchNorm2d(mid_channels, momentum=0.99, eps=1e-3),
+            nn.ReLU(),
+            nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=1, pad_mode="pad", padding=1,
+                      group=mid_channels),
+            nn.BatchNorm2d(mid_channels, momentum=0.99, eps=1e-3),
+            nn.ReLU(),
+            nn.Conv2d(mid_channels, input_channels, kernel_size=1, stride=1),
+            nn.BatchNorm2d(input_channels, momentum=0.99, eps=1e-3),
+        ]
+
+        for t, c, n, s, k in inverted_residual_setting:
+            output_channels = make_divisible(c * alpha, 8)
+            for i in range(n):
+                stride = s if i == 0 else 1
+                features.append(InvertedResidual(input_channels, output_channels,
+                                                 stride=stride, kernel_size=k, expand_ratio=t))
+                input_channels = output_channels
+
+        features.extend([
+            nn.Conv2d(input_channels, 1280, kernel_size=1, stride=1),
+            nn.BatchNorm2d(1280, momentum=0.99, eps=1e-3),
+            nn.ReLU(),
+        ])
+        self.features = nn.SequentialCell(features)
+        self.pool = GlobalAvgPooling()
+        self.dropout = Dropout(p=drop_rate)
+        self.classifier = nn.Dense(1280, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(mode="fan_out", nonlinearity="relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(mode="fan_out", nonlinearity="sigmoid"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.dropout(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.mnasnet.mnasnet_050(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MnasNet model with width scaled by 0.5. +Refer to the base class models.Mnasnet for more details.

+ +
+ Source code in mindcv/models/mnasnet.py +
180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
@register_model
+def mnasnet_050(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> Mnasnet:
+    """Get MnasNet model with width scaled by 0.5.
+    Refer to the base class `models.Mnasnet` for more details."""
+    default_cfg = default_cfgs["mnasnet_050"]
+    model = Mnasnet(alpha=0.5, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mnasnet.mnasnet_075(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MnasNet model with width scaled by 0.75. +Refer to the base class models.Mnasnet for more details.

+ +
+ Source code in mindcv/models/mnasnet.py +
193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
@register_model
+def mnasnet_075(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> Mnasnet:
+    """Get MnasNet model with width scaled by 0.75.
+    Refer to the base class `models.Mnasnet` for more details."""
+    default_cfg = default_cfgs["mnasnet_075"]
+    model = Mnasnet(alpha=0.75, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mnasnet.mnasnet_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MnasNet model with width scaled by 1.0. +Refer to the base class models.Mnasnet for more details.

+ +
+ Source code in mindcv/models/mnasnet.py +
206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
@register_model
+def mnasnet_100(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> Mnasnet:
+    """Get MnasNet model with width scaled by 1.0.
+    Refer to the base class `models.Mnasnet` for more details."""
+    default_cfg = default_cfgs["mnasnet_100"]
+    model = Mnasnet(alpha=1.0, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mnasnet.mnasnet_130(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MnasNet model with width scaled by 1.3. +Refer to the base class models.Mnasnet for more details.

+ +
+ Source code in mindcv/models/mnasnet.py +
219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
@register_model
+def mnasnet_130(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> Mnasnet:
+    """Get MnasNet model with width scaled by 1.3.
+    Refer to the base class `models.Mnasnet` for more details."""
+    default_cfg = default_cfgs["mnasnet_130"]
+    model = Mnasnet(alpha=1.3, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mnasnet.mnasnet_140(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MnasNet model with width scaled by 1.4. +Refer to the base class models.Mnasnet for more details.

+ +
+ Source code in mindcv/models/mnasnet.py +
232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
@register_model
+def mnasnet_140(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> Mnasnet:
+    """Get MnasNet model with width scaled by 1.4.
+    Refer to the base class `models.Mnasnet` for more details."""
+    default_cfg = default_cfgs["mnasnet_140"]
+    model = Mnasnet(alpha=1.4, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

mobilenetv1

+ + +
+ + + +

+ mindcv.models.mobilenetv1.MobileNetV1 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

MobileNetV1 model class, based on +"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" <https://arxiv.org/abs/1704.04861>_ # noqa: E501

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
alpha +
+

scale factor of model width. Default: 1.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
+ +
+ Source code in mindcv/models/mobilenetv1.py +
 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
class MobileNetV1(nn.Cell):
+    r"""MobileNetV1 model class, based on
+    `"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" <https://arxiv.org/abs/1704.04861>`_  # noqa: E501
+
+    Args:
+        alpha: scale factor of model width. Default: 1.
+        in_channels: number the channels of the input. Default: 3.
+        num_classes: number of classification classes. Default: 1000.
+    """
+
+    def __init__(
+        self,
+        alpha: float = 1.0,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+    ) -> None:
+        super().__init__()
+        input_channels = int(32 * alpha)
+        # Setting of depth-wise separable conv
+        # c: number of output channel
+        # s: stride of depth-wise conv
+        block_setting = [
+            # c, s
+            [64, 1],
+            [128, 2],
+            [128, 1],
+            [256, 2],
+            [256, 1],
+            [512, 2],
+            [512, 1],
+            [512, 1],
+            [512, 1],
+            [512, 1],
+            [512, 1],
+            [1024, 2],
+            [1024, 1],
+        ]
+
+        features = [
+            nn.Conv2d(in_channels, input_channels, 3, 2, pad_mode="pad", padding=1, has_bias=False),
+            nn.BatchNorm2d(input_channels),
+            nn.ReLU(),
+        ]
+        for c, s in block_setting:
+            output_channel = int(c * alpha)
+            features.append(depthwise_separable_conv(input_channels, output_channel, s))
+            input_channels = output_channel
+        self.features = nn.SequentialCell(features)
+
+        self.pool = GlobalAvgPooling()
+        self.classifier = nn.Dense(input_channels, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(init.initializer(init.XavierUniform(), cell.weight.shape, cell.weight.dtype))
+            if isinstance(cell, nn.Dense):
+                cell.weight.set_data(init.initializer(init.TruncatedNormal(), cell.weight.shape, cell.weight.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv1.mobilenet_v1_025(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV1 model with width scaled by 0.25. +Refer to the base class models.MobileNetV1 for more details.

+ +
+ Source code in mindcv/models/mobilenetv1.py +
137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
@register_model
+def mobilenet_v1_025(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV1:
+    """Get MobileNetV1 model with width scaled by 0.25.
+    Refer to the base class `models.MobileNetV1` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v1_025"]
+    model = MobileNetV1(alpha=0.25, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv1.mobilenet_v1_050(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV1 model with width scaled by 0.5. +Refer to the base class models.MobileNetV1 for more details.

+ +
+ Source code in mindcv/models/mobilenetv1.py +
151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
@register_model
+def mobilenet_v1_050(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV1:
+    """Get MobileNetV1 model with width scaled by 0.5.
+    Refer to the base class `models.MobileNetV1` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v1_050"]
+    model = MobileNetV1(alpha=0.5, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv1.mobilenet_v1_075(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV1 model with width scaled by 0.75. +Refer to the base class models.MobileNetV1 for more details.

+ +
+ Source code in mindcv/models/mobilenetv1.py +
165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
@register_model
+def mobilenet_v1_075(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV1:
+    """Get MobileNetV1 model with width scaled by 0.75.
+    Refer to the base class `models.MobileNetV1` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v1_075"]
+    model = MobileNetV1(alpha=0.75, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv1.mobilenet_v1_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV1 model without width scaling. +Refer to the base class models.MobileNetV1 for more details.

+ +
+ Source code in mindcv/models/mobilenetv1.py +
179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
@register_model
+def mobilenet_v1_100(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV1:
+    """Get MobileNetV1 model without width scaling.
+    Refer to the base class `models.MobileNetV1` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v1_100"]
+    model = MobileNetV1(alpha=1.0, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

mobilenetv2

+ + +
+ + + +

+ mindcv.models.mobilenetv2.MobileNetV2 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

MobileNetV2 model class, based on +"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
alpha +
+

scale factor of model width. Default: 1.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
round_nearest +
+

divisor of make divisible function. Default: 8.

+
+

+ + TYPE: + int + + + DEFAULT: + 8 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
+ +
+ Source code in mindcv/models/mobilenetv2.py +
163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
class MobileNetV2(nn.Cell):
+    r"""MobileNetV2 model class, based on
+    `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_
+
+    Args:
+        alpha: scale factor of model width. Default: 1.
+        round_nearest: divisor of make divisible function. Default: 8.
+        in_channels: number the channels of the input. Default: 3.
+        num_classes: number of classification classes. Default: 1000.
+    """
+
+    def __init__(
+        self,
+        alpha: float = 1.0,
+        round_nearest: int = 8,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+    ) -> None:
+        super().__init__()
+        input_channels = make_divisible(32 * alpha, round_nearest)
+        # Setting of inverted residual blocks.
+        # t: The expansion factor.
+        # c: Number of output channel.
+        # n: Number of block.
+        # s: First block stride.
+        inverted_residual_setting = [
+            # t, c, n, s
+            [1, 16, 1, 1],
+            [6, 24, 2, 2],
+            [6, 32, 3, 2],
+            [6, 64, 4, 2],
+            [6, 96, 3, 1],
+            [6, 160, 3, 2],
+            [6, 320, 1, 1],
+        ]
+        last_channels = make_divisible(1280 * max(1.0, alpha), round_nearest)
+
+        # Building stem conv layer.
+        features = [
+            nn.Conv2d(in_channels, input_channels, 3, 2, pad_mode="pad", padding=1, has_bias=False),
+            nn.BatchNorm2d(input_channels),
+            nn.ReLU6(),
+        ]
+        # Building inverted residual blocks.
+        for t, c, n, s in inverted_residual_setting:
+            output_channel = make_divisible(c * alpha, round_nearest)
+            for i in range(n):
+                stride = s if i == 0 else 1
+                features.append(InvertedResidual(input_channels, output_channel, stride, expand_ratio=t))
+                input_channels = output_channel
+        # Building last point-wise layers.
+        features.extend([
+            nn.Conv2d(input_channels, last_channels, 1, 1, pad_mode="pad", padding=0, has_bias=False),
+            nn.BatchNorm2d(last_channels),
+            nn.ReLU6(),
+        ])
+        self.features = nn.SequentialCell(features)
+
+        self.pool = GlobalAvgPooling()
+        self.classifier = nn.SequentialCell([
+            Dropout(p=0.2),  # confirmed by paper authors
+            nn.Dense(last_channels, num_classes),
+        ])
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                n = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels
+                cell.weight.set_data(
+                    init.initializer(init.Normal(sigma=math.sqrt(2. / n), mean=0.0),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.Normal(sigma=0.01, mean=0.0), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_035_128(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.35 and input image size of 128. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
@register_model
+def mobilenet_v2_035_128(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.35 and input image size of 128.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_035_128"]
+    model = MobileNetV2(alpha=0.35, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_035_160(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.35 and input image size of 160. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
@register_model
+def mobilenet_v2_035_160(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.35 and input image size of 160.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_035_160"]
+    model = MobileNetV2(alpha=0.35, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_035_192(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.35 and input image size of 192. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
514
+515
+516
+517
+518
+519
+520
+521
+522
+523
+524
+525
@register_model
+def mobilenet_v2_035_192(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.35 and input image size of 192.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_035_192"]
+    model = MobileNetV2(alpha=0.35, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_035_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.35 and input image size of 224. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
@register_model
+def mobilenet_v2_035_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.35 and input image size of 224.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_035_224"]
+    model = MobileNetV2(alpha=0.35, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_035_96(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.35 and input image size of 96. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
@register_model
+def mobilenet_v2_035_96(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.35 and input image size of 96.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_035_96"]
+    model = MobileNetV2(alpha=0.35, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_050_128(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.5 and input image size of 128. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
@register_model
+def mobilenet_v2_050_128(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.5 and input image size of 128.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_050_128"]
+    model = MobileNetV2(alpha=0.5, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_050_160(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.5 and input image size of 160. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
@register_model
+def mobilenet_v2_050_160(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.5 and input image size of 160.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_050_160"]
+    model = MobileNetV2(alpha=0.5, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_050_192(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.5 and input image size of 192. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
@register_model
+def mobilenet_v2_050_192(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.5 and input image size of 192.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_050_192"]
+    model = MobileNetV2(alpha=0.5, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_050_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.5 and input image size of 224. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
@register_model
+def mobilenet_v2_050_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.5 and input image size of 224.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_050_224"]
+    model = MobileNetV2(alpha=0.5, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_050_96(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.5 and input image size of 96. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
+497
@register_model
+def mobilenet_v2_050_96(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.5 and input image size of 96.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_050_96"]
+    model = MobileNetV2(alpha=0.5, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_075(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.75 and input image size of 224. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
@register_model
+def mobilenet_v2_075(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.75 and input image size of 224.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_075"]
+    model = MobileNetV2(alpha=0.75, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_075_128(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.75 and input image size of 128. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
@register_model
+def mobilenet_v2_075_128(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.75 and input image size of 128.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_075_128"]
+    model = MobileNetV2(alpha=0.75, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_075_160(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.75 and input image size of 160. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
@register_model
+def mobilenet_v2_075_160(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.75 and input image size of 160.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_075_160"]
+    model = MobileNetV2(alpha=0.75, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_075_192(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.75 and input image size of 192. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
@register_model
+def mobilenet_v2_075_192(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.75 and input image size of 192.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_075_192"]
+    model = MobileNetV2(alpha=0.75, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_075_96(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.75 and input image size of 96. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
@register_model
+def mobilenet_v2_075_96(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.75 and input image size of 96.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_075_96"]
+    model = MobileNetV2(alpha=0.75, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model without width scaling and input image size of 224. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
@register_model
+def mobilenet_v2_100(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model without width scaling and input image size of 224.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_100"]
+    model = MobileNetV2(alpha=1.0, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_100_128(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model without width scaling and input image size of 128. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
@register_model
+def mobilenet_v2_100_128(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model without width scaling and input image size of 128.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_100_128"]
+    model = MobileNetV2(alpha=1.0, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_100_160(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model without width scaling and input image size of 160. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
@register_model
+def mobilenet_v2_100_160(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model without width scaling and input image size of 160.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_100_160"]
+    model = MobileNetV2(alpha=1.0, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_100_192(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model without width scaling and input image size of 192. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
@register_model
+def mobilenet_v2_100_192(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model without width scaling and input image size of 192.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_100_192"]
+    model = MobileNetV2(alpha=1.0, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_100_96(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model without width scaling and input image size of 96. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
@register_model
+def mobilenet_v2_100_96(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model without width scaling and input image size of 96.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_100_96"]
+    model = MobileNetV2(alpha=1.0, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_130_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 1.3 and input image size of 224. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
@register_model
+def mobilenet_v2_130_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 1.3 and input image size of 224.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_130_224"]
+    model = MobileNetV2(alpha=1.3, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_140(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 1.4 and input image size of 224. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
@register_model
+def mobilenet_v2_140(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 1.4 and input image size of 224.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_140"]
+    model = MobileNetV2(alpha=1.4, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

mobilenetv3

+ + +
+ + + +

+ mindcv.models.mobilenetv3.MobileNetV3 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

MobileNetV3 model class, based on +"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
arch +
+

size of the architecture. 'small' or 'large'.

+
+

+ + TYPE: + str + +

+
alpha +
+

scale factor of model width. Default: 1.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
round_nearest +
+

divisor of make divisible function. Default: 8.

+
+

+ + TYPE: + int + + + DEFAULT: + 8 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
+ +
+ Source code in mindcv/models/mobilenetv3.py +
100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
class MobileNetV3(nn.Cell):
+    r"""MobileNetV3 model class, based on
+    `"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>`_
+
+    Args:
+        arch: size of the architecture. 'small' or 'large'.
+        alpha: scale factor of model width. Default: 1.
+        round_nearest: divisor of make divisible function. Default: 8.
+        in_channels: number the channels of the input. Default: 3.
+        num_classes: number of classification classes. Default: 1000.
+    """
+
+    def __init__(
+        self,
+        arch: str,
+        alpha: float = 1.0,
+        round_nearest: int = 8,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+    ) -> None:
+        super().__init__()
+        input_channels = make_divisible(16 * alpha, round_nearest)
+        # Setting of bottleneck blocks. ex: [k, e, c, se, nl, s]
+        # k: kernel size of depth-wise conv
+        # e: expansion size
+        # c: number of output channel
+        # se: whether there is a Squeeze-And-Excite in that block
+        # nl: type of non-linearity used
+        # s: stride of depth-wise conv
+        if arch == "large":
+            bottleneck_setting = [
+                [3, 16, 16, False, "relu", 1],
+                [3, 64, 24, False, "relu", 2],
+                [3, 72, 24, False, "relu", 1],
+                [5, 72, 40, True, "relu", 2],
+                [5, 120, 40, True, "relu", 1],
+                [5, 120, 40, True, "relu", 1],
+                [3, 240, 80, False, "hswish", 2],
+                [3, 200, 80, False, "hswish", 1],
+                [3, 184, 80, False, "hswish", 1],
+                [3, 184, 80, False, "hswish", 1],
+                [3, 480, 112, True, "hswish", 1],
+                [3, 672, 112, True, "hswish", 1],
+                [5, 672, 160, True, "hswish", 2],
+                [5, 960, 160, True, "hswish", 1],
+                [5, 960, 160, True, "hswish", 1],
+            ]
+            last_channels = make_divisible(alpha * 1280, round_nearest)
+        elif arch == "small":
+            bottleneck_setting = [
+                [3, 16, 16, True, "relu", 2],
+                [3, 72, 24, False, "relu", 2],
+                [3, 88, 24, False, "relu", 1],
+                [5, 96, 40, True, "hswish", 2],
+                [5, 240, 40, True, "hswish", 1],
+                [5, 240, 40, True, "hswish", 1],
+                [5, 120, 48, True, "hswish", 1],
+                [5, 144, 48, True, "hswish", 1],
+                [5, 288, 96, True, "hswish", 2],
+                [5, 576, 96, True, "hswish", 1],
+                [5, 576, 96, True, "hswish", 1],
+            ]
+            last_channels = make_divisible(alpha * 1024, round_nearest)
+        else:
+            raise ValueError(f"Unsupported model type {arch}")
+
+        # Building stem conv layer.
+        features = [
+            nn.Conv2d(in_channels, input_channels, 3, 2, pad_mode="pad", padding=1, has_bias=False),
+            nn.BatchNorm2d(input_channels),
+            nn.HSwish(),
+        ]
+
+        total_reduction = 2
+        self.feature_info = [dict(chs=input_channels, reduction=total_reduction, name=f'features.{len(features) - 1}')]
+
+        # Building bottleneck blocks.
+        for k, e, c, se, nl, s in bottleneck_setting:
+            exp_channels = make_divisible(alpha * e, round_nearest)
+            output_channels = make_divisible(alpha * c, round_nearest)
+            features.append(Bottleneck(input_channels, exp_channels, output_channels,
+                                       kernel_size=k, stride=s, activation=nl, use_se=se))
+            input_channels = output_channels
+
+            total_reduction *= s
+            self.feature_info.append(dict(chs=input_channels, reduction=total_reduction,
+                                          name=f'features.{len(features) - 1}'))
+
+        # Building last point-wise conv layers.
+        output_channels = input_channels * 6
+        features.extend([
+            nn.Conv2d(input_channels, output_channels, 1, 1, pad_mode="pad", padding=0, has_bias=False),
+            nn.BatchNorm2d(output_channels),
+            nn.HSwish(),
+        ])
+
+        self.feature_info.append(dict(chs=output_channels, reduction=total_reduction,
+                                      name=f'features.{len(features) - 1}'))
+        self.flatten_sequential = True
+
+        self.features = nn.SequentialCell(features)
+
+        self.pool = GlobalAvgPooling()
+        self.classifier = nn.SequentialCell([
+            nn.Dense(output_channels, last_channels),
+            nn.HSwish(),
+            Dropout(p=0.2),
+            nn.Dense(last_channels, num_classes),
+        ])
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                n = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels
+                cell.weight.set_data(
+                    init.initializer(init.Normal(sigma=math.sqrt(2. / n), mean=0.0),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.Normal(sigma=0.01, mean=0.0), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv3.mobilenet_v3_large_075(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get large MobileNetV3 model with width scaled by 0.75. +Refer to the base class models.MobileNetV3 for more details.

+ +
+ Source code in mindcv/models/mobilenetv3.py +
279
+280
+281
+282
+283
+284
+285
+286
@register_model
+def mobilenet_v3_large_075(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV3:
+    """Get large MobileNetV3 model with width scaled by 0.75.
+    Refer to the base class `models.MobileNetV3` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v3_large_075"]
+    model_args = dict(arch="large", alpha=0.75, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return _create_mobilenet_v3(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv3.mobilenet_v3_large_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get large MobileNetV3 model without width scaling. +Refer to the base class models.MobileNetV3 for more details.

+ +
+ Source code in mindcv/models/mobilenetv3.py +
259
+260
+261
+262
+263
+264
+265
+266
@register_model
+def mobilenet_v3_large_100(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV3:
+    """Get large MobileNetV3 model without width scaling.
+    Refer to the base class `models.MobileNetV3` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v3_large_100"]
+    model_args = dict(arch="large", alpha=1.0, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return _create_mobilenet_v3(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv3.mobilenet_v3_small_075(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get small MobileNetV3 model with width scaled by 0.75. +Refer to the base class models.MobileNetV3 for more details.

+ +
+ Source code in mindcv/models/mobilenetv3.py +
269
+270
+271
+272
+273
+274
+275
+276
@register_model
+def mobilenet_v3_small_075(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV3:
+    """Get small MobileNetV3 model with width scaled by 0.75.
+    Refer to the base class `models.MobileNetV3` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v3_small_075"]
+    model_args = dict(arch="small", alpha=0.75, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return _create_mobilenet_v3(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv3.mobilenet_v3_small_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get small MobileNetV3 model without width scaling. +Refer to the base class models.MobileNetV3 for more details.

+ +
+ Source code in mindcv/models/mobilenetv3.py +
249
+250
+251
+252
+253
+254
+255
+256
@register_model
+def mobilenet_v3_small_100(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV3:
+    """Get small MobileNetV3 model without width scaling.
+    Refer to the base class `models.MobileNetV3` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v3_small_100"]
+    model_args = dict(arch="small", alpha=1.0, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return _create_mobilenet_v3(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +

mobilevit

+ + + +
+ + + +

+mindcv.models.mobilevit.mobilevit_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mobilevit.py +
840
+841
+842
+843
+844
+845
+846
+847
@register_model
+def mobilevit_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileViT:
+    config = get_config("small")
+    model = MobileViT(config, num_classes=num_classes, **kwargs)
+    default_cfg = default_cfgs["mobilevit_small"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilevit.mobilevit_x_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mobilevit.py +
830
+831
+832
+833
+834
+835
+836
+837
@register_model
+def mobilevit_x_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileViT:
+    config = get_config("x_small")
+    model = MobileViT(config, num_classes=num_classes, **kwargs)
+    default_cfg = default_cfgs["mobilevit_x_small"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilevit.mobilevit_xx_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mobilevit.py +
820
+821
+822
+823
+824
+825
+826
+827
@register_model
+def mobilevit_xx_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileViT:
+    config = get_config("xx_small")
+    model = MobileViT(config, num_classes=num_classes, **kwargs)
+    default_cfg = default_cfgs["mobilevit_xx_small"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

nasnet

+ + +
+ + + +

+ mindcv.models.nasnet.NASNetAMobile + + +

+ + +
+

+ Bases: nn.Cell

+ + +

NasNet model class, based on +"Learning Transferable Architectures for Scalable Image Recognition" <https://arxiv.org/pdf/1707.07012v4.pdf>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_classes +
+

number of classification classes.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
stem_filters +
+

number of stem filters. Default: 32.

+
+

+ + TYPE: + int + + + DEFAULT: + 32 + +

+
penultimate_filters +
+

number of penultimate filters. Default: 1056.

+
+

+ + TYPE: + int + + + DEFAULT: + 1056 + +

+
filters_multiplier +
+

size of filters multiplier. Default: 2.

+
+

+ + TYPE: + int + + + DEFAULT: + 2 + +

+
+ +
+ Source code in mindcv/models/nasnet.py +
681
+682
+683
+684
+685
+686
+687
+688
+689
+690
+691
+692
+693
+694
+695
+696
+697
+698
+699
+700
+701
+702
+703
+704
+705
+706
+707
+708
+709
+710
+711
+712
+713
+714
+715
+716
+717
+718
+719
+720
+721
+722
+723
+724
+725
+726
+727
+728
+729
+730
+731
+732
+733
+734
+735
+736
+737
+738
+739
+740
+741
+742
+743
+744
+745
+746
+747
+748
+749
+750
+751
+752
+753
+754
+755
+756
+757
+758
+759
+760
+761
+762
+763
+764
+765
+766
+767
+768
+769
+770
+771
+772
+773
+774
+775
+776
+777
+778
+779
+780
+781
+782
+783
+784
+785
+786
+787
+788
+789
+790
+791
+792
+793
+794
+795
+796
+797
+798
+799
+800
+801
+802
+803
+804
+805
+806
+807
+808
+809
+810
+811
+812
+813
+814
+815
+816
+817
+818
+819
+820
+821
+822
+823
+824
+825
+826
+827
+828
+829
+830
+831
+832
+833
+834
+835
+836
+837
+838
+839
+840
+841
+842
+843
+844
+845
+846
+847
+848
+849
+850
+851
+852
+853
+854
+855
+856
+857
+858
+859
+860
+861
+862
+863
+864
+865
+866
+867
+868
+869
+870
+871
class NASNetAMobile(nn.Cell):
+    r"""NasNet model class, based on
+    `"Learning Transferable Architectures for Scalable Image Recognition" <https://arxiv.org/pdf/1707.07012v4.pdf>`_
+    Args:
+        num_classes: number of classification classes.
+        stem_filters: number of stem filters. Default: 32.
+        penultimate_filters: number of penultimate filters. Default: 1056.
+        filters_multiplier: size of filters multiplier. Default: 2.
+    """
+
+    def __init__(
+        self,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+        stem_filters: int = 32,
+        penultimate_filters: int = 1056,
+        filters_multiplier: int = 2,
+    ) -> None:
+        super().__init__()
+        self.stem_filters = stem_filters
+        self.penultimate_filters = penultimate_filters
+        self.filters_multiplier = filters_multiplier
+
+        filters = self.penultimate_filters // 24
+        # 24 is default value for the architecture
+
+        self.conv0 = nn.SequentialCell([
+            nn.Conv2d(in_channels=in_channels, out_channels=self.stem_filters, kernel_size=3, stride=2, pad_mode="pad",
+                      padding=0,
+                      has_bias=False),
+            nn.BatchNorm2d(num_features=self.stem_filters, eps=0.001, momentum=0.9, affine=True)
+        ])
+
+        self.cell_stem_0 = CellStem0(
+            self.stem_filters, num_filters=filters // (filters_multiplier ** 2)
+        )
+        self.cell_stem_1 = CellStem1(
+            self.stem_filters, num_filters=filters // filters_multiplier
+        )
+
+        self.cell_0 = FirstCell(
+            in_channels_left=filters,
+            out_channels_left=filters // 2,  # 1, 0.5
+            in_channels_right=2 * filters,
+            out_channels_right=filters,
+        )  # 2, 1
+        self.cell_1 = NormalCell(
+            in_channels_left=2 * filters,
+            out_channels_left=filters,  # 2, 1
+            in_channels_right=6 * filters,
+            out_channels_right=filters,
+        )  # 6, 1
+        self.cell_2 = NormalCell(
+            in_channels_left=6 * filters,
+            out_channels_left=filters,  # 6, 1
+            in_channels_right=6 * filters,
+            out_channels_right=filters,
+        )  # 6, 1
+        self.cell_3 = NormalCell(
+            in_channels_left=6 * filters,
+            out_channels_left=filters,  # 6, 1
+            in_channels_right=6 * filters,
+            out_channels_right=filters,
+        )  # 6, 1
+
+        self.reduction_cell_0 = ReductionCell0(
+            in_channels_left=6 * filters,
+            out_channels_left=2 * filters,  # 6, 2
+            in_channels_right=6 * filters,
+            out_channels_right=2 * filters,
+        )  # 6, 2
+
+        self.cell_6 = FirstCell(
+            in_channels_left=6 * filters,
+            out_channels_left=filters,  # 6, 1
+            in_channels_right=8 * filters,
+            out_channels_right=2 * filters,
+        )  # 8, 2
+        self.cell_7 = NormalCell(
+            in_channels_left=8 * filters,
+            out_channels_left=2 * filters,  # 8, 2
+            in_channels_right=12 * filters,
+            out_channels_right=2 * filters,
+        )  # 12, 2
+        self.cell_8 = NormalCell(
+            in_channels_left=12 * filters,
+            out_channels_left=2 * filters,  # 12, 2
+            in_channels_right=12 * filters,
+            out_channels_right=2 * filters,
+        )  # 12, 2
+        self.cell_9 = NormalCell(
+            in_channels_left=12 * filters,
+            out_channels_left=2 * filters,  # 12, 2
+            in_channels_right=12 * filters,
+            out_channels_right=2 * filters,
+        )  # 12, 2
+
+        self.reduction_cell_1 = ReductionCell1(
+            in_channels_left=12 * filters,
+            out_channels_left=4 * filters,  # 12, 4
+            in_channels_right=12 * filters,
+            out_channels_right=4 * filters,
+        )  # 12, 4
+
+        self.cell_12 = FirstCell(
+            in_channels_left=12 * filters,
+            out_channels_left=2 * filters,  # 12, 2
+            in_channels_right=16 * filters,
+            out_channels_right=4 * filters,
+        )  # 16, 4
+        self.cell_13 = NormalCell(
+            in_channels_left=16 * filters,
+            out_channels_left=4 * filters,  # 16, 4
+            in_channels_right=24 * filters,
+            out_channels_right=4 * filters,
+        )  # 24, 4
+        self.cell_14 = NormalCell(
+            in_channels_left=24 * filters,
+            out_channels_left=4 * filters,  # 24, 4
+            in_channels_right=24 * filters,
+            out_channels_right=4 * filters,
+        )  # 24, 4
+        self.cell_15 = NormalCell(
+            in_channels_left=24 * filters,
+            out_channels_left=4 * filters,  # 24, 4
+            in_channels_right=24 * filters,
+            out_channels_right=4 * filters,
+        )  # 24, 4
+
+        self.relu = nn.ReLU()
+        self.dropout = Dropout(p=0.5)
+        self.classifier = nn.Dense(in_channels=24 * filters, out_channels=num_classes)
+        self.pool = GlobalAvgPooling()
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        """Initialize weights for cells."""
+        self.init_parameters_data()
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                n = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels
+                cell.weight.set_data(init.initializer(init.Normal(math.sqrt(2. / n), 0),
+                                                      cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Zero(), cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer(init.One(), cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer(init.Zero(), cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(init.initializer(init.Normal(0.01, 0), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Zero(), cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        """Network forward feature extraction."""
+        x_conv0 = self.conv0(x)
+        x_stem_0 = self.cell_stem_0(x_conv0)
+        x_stem_1 = self.cell_stem_1(x_conv0, x_stem_0)
+
+        x_cell_0 = self.cell_0(x_stem_1, x_stem_0)
+        x_cell_1 = self.cell_1(x_cell_0, x_stem_1)
+        x_cell_2 = self.cell_2(x_cell_1, x_cell_0)
+        x_cell_3 = self.cell_3(x_cell_2, x_cell_1)
+
+        x_reduction_cell_0 = self.reduction_cell_0(x_cell_3, x_cell_2)
+
+        x_cell_6 = self.cell_6(x_reduction_cell_0, x_cell_3)
+        x_cell_7 = self.cell_7(x_cell_6, x_reduction_cell_0)
+        x_cell_8 = self.cell_8(x_cell_7, x_cell_6)
+        x_cell_9 = self.cell_9(x_cell_8, x_cell_7)
+
+        x_reduction_cell_1 = self.reduction_cell_1(x_cell_9, x_cell_8)
+
+        x_cell_12 = self.cell_12(x_reduction_cell_1, x_cell_9)
+        x_cell_13 = self.cell_13(x_cell_12, x_reduction_cell_1)
+        x_cell_14 = self.cell_14(x_cell_13, x_cell_12)
+        x_cell_15 = self.cell_15(x_cell_14, x_cell_13)
+
+        x_cell_15 = self.relu(x_cell_15)
+        return x_cell_15
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)  # global average pool
+        x = self.dropout(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.nasnet.NASNetAMobile.forward_features(x) + +

+ + +
+ +

Network forward feature extraction.

+ +
+ Source code in mindcv/models/nasnet.py +
834
+835
+836
+837
+838
+839
+840
+841
+842
+843
+844
+845
+846
+847
+848
+849
+850
+851
+852
+853
+854
+855
+856
+857
+858
+859
+860
def forward_features(self, x: Tensor) -> Tensor:
+    """Network forward feature extraction."""
+    x_conv0 = self.conv0(x)
+    x_stem_0 = self.cell_stem_0(x_conv0)
+    x_stem_1 = self.cell_stem_1(x_conv0, x_stem_0)
+
+    x_cell_0 = self.cell_0(x_stem_1, x_stem_0)
+    x_cell_1 = self.cell_1(x_cell_0, x_stem_1)
+    x_cell_2 = self.cell_2(x_cell_1, x_cell_0)
+    x_cell_3 = self.cell_3(x_cell_2, x_cell_1)
+
+    x_reduction_cell_0 = self.reduction_cell_0(x_cell_3, x_cell_2)
+
+    x_cell_6 = self.cell_6(x_reduction_cell_0, x_cell_3)
+    x_cell_7 = self.cell_7(x_cell_6, x_reduction_cell_0)
+    x_cell_8 = self.cell_8(x_cell_7, x_cell_6)
+    x_cell_9 = self.cell_9(x_cell_8, x_cell_7)
+
+    x_reduction_cell_1 = self.reduction_cell_1(x_cell_9, x_cell_8)
+
+    x_cell_12 = self.cell_12(x_reduction_cell_1, x_cell_9)
+    x_cell_13 = self.cell_13(x_cell_12, x_reduction_cell_1)
+    x_cell_14 = self.cell_14(x_cell_13, x_cell_12)
+    x_cell_15 = self.cell_15(x_cell_14, x_cell_13)
+
+    x_cell_15 = self.relu(x_cell_15)
+    return x_cell_15
+
+
+
+ +
+ + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.nasnet.nasnet_a_4x1056(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get NasNet model. +Refer to the base class models.NASNetAMobile for more details.

+ +
+ Source code in mindcv/models/nasnet.py +
874
+875
+876
+877
+878
+879
+880
+881
+882
@register_model
+def nasnet_a_4x1056(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> NASNetAMobile:
+    """Get NasNet model.
+    Refer to the base class `models.NASNetAMobile` for more details."""
+    default_cfg = default_cfgs["nasnet_a_4x1056"]
+    model = NASNetAMobile(in_channels=in_channels, num_classes=num_classes, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

pit

+ + + +
+ + + +

+mindcv.models.pit.pit_b(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PiT-B model. +Refer to the base class models.PoolingTransformer for more details.

+ +
+ Source code in mindcv/models/pit.py +
475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
@register_model
+def pit_b(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolingTransformer:
+    """Get PiT-B model.
+    Refer to the base class `models.PoolingTransformer` for more details."""
+    default_cfg = default_cfgs["pit_b"]
+    model = PoolingTransformer(
+        image_size=224,
+        patch_size=14,
+        stride=7,
+        base_dims=[64, 64, 64],
+        depth=[3, 6, 4],
+        heads=[4, 8, 16],
+        mlp_ratio=4.0,
+        num_classes=num_classes,
+        in_chans=in_channels,
+        **kwargs
+    )
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pit.pit_s(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PiT-S model. +Refer to the base class models.PoolingTransformer for more details.

+ +
+ Source code in mindcv/models/pit.py +
451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
@register_model
+def pit_s(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolingTransformer:
+    """Get PiT-S model.
+    Refer to the base class `models.PoolingTransformer` for more details."""
+    default_cfg = default_cfgs["pit_s"]
+    model = PoolingTransformer(
+        image_size=224,
+        patch_size=16,
+        stride=8,
+        base_dims=[48, 48, 48],
+        depth=[2, 6, 4],
+        heads=[3, 6, 12],
+        mlp_ratio=4.0,
+        num_classes=num_classes,
+        in_chans=in_channels,
+        **kwargs
+    )
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pit.pit_ti(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PiT-Ti model. +Refer to the base class models.PoolingTransformer for more details.

+ +
+ Source code in mindcv/models/pit.py +
403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
@register_model
+def pit_ti(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolingTransformer:
+    """Get PiT-Ti model.
+    Refer to the base class `models.PoolingTransformer` for more details."""
+    default_cfg = default_cfgs["pit_ti"]
+    model = PoolingTransformer(
+        image_size=224,
+        patch_size=16,
+        stride=8,
+        base_dims=[32, 32, 32],
+        depth=[2, 6, 4],
+        heads=[2, 4, 8],
+        mlp_ratio=4.0,
+        num_classes=num_classes,
+        in_chans=in_channels,
+        **kwargs
+    )
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pit.pit_xs(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PiT-XS model. +Refer to the base class models.PoolingTransformer for more details.

+ +
+ Source code in mindcv/models/pit.py +
427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
@register_model
+def pit_xs(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolingTransformer:
+    """Get PiT-XS model.
+    Refer to the base class `models.PoolingTransformer` for more details."""
+    default_cfg = default_cfgs["pit_xs"]
+    model = PoolingTransformer(
+        image_size=224,
+        patch_size=16,
+        stride=8,
+        base_dims=[48, 48, 48],
+        depth=[2, 6, 4],
+        heads=[2, 4, 8],
+        mlp_ratio=4.0,
+        num_classes=num_classes,
+        in_chans=in_channels,
+        **kwargs
+    )
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

poolformer

+ + +
+ + + +

+ mindcv.models.poolformer.PoolFormer + + +

+ + +
+

+ Bases: nn.Cell

+ + +

PoolFormer model class, based on +"MetaFormer Is Actually What You Need for Vision" <https://arxiv.org/pdf/2111.11418v3.pdf>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
layers +
+

number of blocks for the 4 stages

+
+

+

+
embed_dims +
+

the embedding dims for the 4 stages. Default: (64, 128, 320, 512)

+
+

+ + DEFAULT: + (64, 128, 320, 512) + +

+
mlp_ratios +
+

mlp ratios for the 4 stages. Default: (4, 4, 4, 4)

+
+

+ + DEFAULT: + (4, 4, 4, 4) + +

+
downsamples +
+

flags to apply downsampling or not. Default: (True, True, True, True)

+
+

+ + DEFAULT: + (True, True, True, True) + +

+
pool_size +
+

the pooling size for the 4 stages. Default: 3

+
+

+ + DEFAULT: + 3 + +

+
in_chans +
+

number of input channels. Default: 3

+
+

+ + DEFAULT: + 3 + +

+
num_classes +
+

number of classes for the image classification. Default: 1000

+
+

+ + DEFAULT: + 1000 + +

+
global_pool +
+

define the types of pooling layer. Default: avg

+
+

+ + DEFAULT: + 'avg' + +

+
norm_layer +
+

define the types of normalization. Default: nn.GroupNorm

+
+

+ + DEFAULT: + nn.GroupNorm + +

+
act_layer +
+

define the types of activation. Default: nn.GELU

+
+

+ + DEFAULT: + nn.GELU + +

+
in_patch_size +
+

specify the patch embedding for the input image. Default: 7

+
+

+ + DEFAULT: + 7 + +

+
in_stride +
+

specify the stride for the input image. Default: 4.

+
+

+ + DEFAULT: + 4 + +

+
in_pad +
+

specify the pad for the input image. Default: 2.

+
+

+ + DEFAULT: + 2 + +

+
down_patch_size +
+

specify the downsample. Default: 3.

+
+

+ + DEFAULT: + 3 + +

+
down_stride +
+

specify the downsample (patch embed.). Default: 2.

+
+

+ + DEFAULT: + 2 + +

+
down_pad +
+

specify the downsample (patch embed.). Default: 1.

+
+

+ + DEFAULT: + 1 + +

+
drop_rate +
+

dropout rate of the layer before main classifier. Default: 0.

+
+

+ + DEFAULT: + 0.0 + +

+
drop_path_rate +
+

Stochastic Depth. Default: 0.

+
+

+ + DEFAULT: + 0.0 + +

+
layer_scale_init_value +
+

LayerScale. Default: 1e-5.

+
+

+ + DEFAULT: + 1e-05 + +

+
fork_feat +
+

whether output features of the 4 stages, for dense prediction. Default: False.

+
+

+ + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/poolformer.py +
204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
class PoolFormer(nn.Cell):
+    r"""PoolFormer model class, based on
+    `"MetaFormer Is Actually What You Need for Vision" <https://arxiv.org/pdf/2111.11418v3.pdf>`_
+
+    Args:
+        layers: number of blocks for the 4 stages
+        embed_dims: the embedding dims for the 4 stages. Default: (64, 128, 320, 512)
+        mlp_ratios: mlp ratios for the 4 stages. Default: (4, 4, 4, 4)
+        downsamples: flags to apply downsampling or not. Default: (True, True, True, True)
+        pool_size: the pooling size for the 4 stages. Default: 3
+        in_chans: number of input channels. Default: 3
+        num_classes: number of classes for the image classification. Default: 1000
+        global_pool: define the types of pooling layer. Default: avg
+        norm_layer: define the types of normalization. Default: nn.GroupNorm
+        act_layer: define the types of activation. Default: nn.GELU
+        in_patch_size: specify the patch embedding for the input image. Default: 7
+        in_stride: specify the stride for the input image. Default: 4.
+        in_pad: specify the pad for the input image. Default: 2.
+        down_patch_size: specify the downsample. Default: 3.
+        down_stride: specify the downsample (patch embed.). Default: 2.
+        down_pad: specify the downsample (patch embed.). Default: 1.
+        drop_rate: dropout rate of the layer before main classifier. Default: 0.
+        drop_path_rate: Stochastic Depth. Default: 0.
+        layer_scale_init_value: LayerScale. Default: 1e-5.
+        fork_feat: whether output features of the 4 stages, for dense prediction. Default: False.
+    """
+
+    def __init__(
+        self,
+        layers,
+        embed_dims=(64, 128, 320, 512),
+        mlp_ratios=(4, 4, 4, 4),
+        downsamples=(True, True, True, True),
+        pool_size=3,
+        in_chans=3,
+        num_classes=1000,
+        global_pool="avg",
+        norm_layer=nn.GroupNorm,
+        act_layer=nn.GELU,
+        in_patch_size=7,
+        in_stride=4,
+        in_pad=2,
+        down_patch_size=3,
+        down_stride=2,
+        down_pad=1,
+        drop_rate=0.0,
+        drop_path_rate=0.0,
+        layer_scale_init_value=1e-5,
+        fork_feat=False,
+    ):
+        super().__init__()
+
+        if not fork_feat:
+            self.num_classes = num_classes
+        self.fork_feat = fork_feat
+
+        self.global_pool = global_pool
+        self.num_features = embed_dims[-1]
+        self.grad_checkpointing = False
+
+        self.patch_embed = PatchEmbed(
+            patch_size=in_patch_size, stride=in_stride, padding=in_pad,
+            in_chs=in_chans, embed_dim=embed_dims[0])
+
+        # set the main block in network
+        network = []
+        for i in range(len(layers)):
+            network.append(basic_blocks(
+                embed_dims[i], i, layers,
+                pool_size=pool_size, mlp_ratio=mlp_ratios[i],
+                act_layer=act_layer, norm_layer=norm_layer,
+                drop_rate=drop_rate, drop_path_rate=drop_path_rate,
+                layer_scale_init_value=layer_scale_init_value)
+            )
+            if i < len(layers) - 1 and (downsamples[i] or embed_dims[i] != embed_dims[i + 1]):
+                # downsampling between stages
+                network.append(PatchEmbed(
+                    in_chs=embed_dims[i], embed_dim=embed_dims[i + 1],
+                    patch_size=down_patch_size, stride=down_stride, padding=down_pad)
+                )
+
+        self.network = nn.SequentialCell(*network)
+        self.norm = norm_layer(1, embed_dims[-1])
+        self.head = nn.Dense(embed_dims[-1], num_classes, has_bias=True) if num_classes > 0 else Identity()
+        # self._initialize_weights()
+        self.cls_init_weights()
+
+    def cls_init_weights(self):
+        """Initialize weights for cells."""
+        for name, m in self.cells_and_names():
+            if isinstance(m, nn.Dense):
+                m.weight.set_data(
+                    init.initializer(init.TruncatedNormal(sigma=.02), m.weight.shape, m.weight.dtype))
+                if m.bias is not None:
+                    m.bias.set_data(
+                        init.initializer(init.Constant(0), m.bias.shape, m.bias.dtype))
+
+    def reset_classifier(self, num_classes, global_pool=None):
+        self.num_classes = num_classes
+        if global_pool is not None:
+            self.global_pool = global_pool
+        self.head = nn.Dense(self.num_features, num_classes) if num_classes > 0 else Identity()
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.patch_embed(x)
+        x = self.network(x)
+        if self.fork_feat:
+            # otuput features of four stages for dense prediction
+            return x
+        x = self.norm(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        return self.head(x.mean([-2, -1]))
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        return self.forward_head(x)
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.poolformer.PoolFormer.cls_init_weights() + +

+ + +
+ +

Initialize weights for cells.

+ +
+ Source code in mindcv/models/poolformer.py +
291
+292
+293
+294
+295
+296
+297
+298
+299
def cls_init_weights(self):
+    """Initialize weights for cells."""
+    for name, m in self.cells_and_names():
+        if isinstance(m, nn.Dense):
+            m.weight.set_data(
+                init.initializer(init.TruncatedNormal(sigma=.02), m.weight.shape, m.weight.dtype))
+            if m.bias is not None:
+                m.bias.set_data(
+                    init.initializer(init.Constant(0), m.bias.shape, m.bias.dtype))
+
+
+
+ +
+ + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.poolformer.poolformer_m36(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get poolformer_m36 model. +Refer to the base class models.PoolFormer for more details.

+ +
+ Source code in mindcv/models/poolformer.py +
359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
@register_model
+def poolformer_m36(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolFormer:
+    """Get poolformer_m36 model.
+    Refer to the base class `models.PoolFormer` for more details."""
+    default_cfg = default_cfgs["poolformer_m36"]
+    layers = (6, 6, 18, 6)
+    embed_dims = (96, 192, 384, 768)
+    model = PoolFormer(
+        in_chans=in_channels,
+        num_classes=num_classes,
+        layers=layers,
+        layer_scale_init_value=1e-6,
+        embed_dims=embed_dims,
+        **kwargs
+    )
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.poolformer.poolformer_m48(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get poolformer_m48 model. +Refer to the base class models.PoolFormer for more details.

+ +
+ Source code in mindcv/models/poolformer.py +
379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
@register_model
+def poolformer_m48(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolFormer:
+    """Get poolformer_m48 model.
+    Refer to the base class `models.PoolFormer` for more details."""
+    default_cfg = default_cfgs["poolformer_m48"]
+    layers = (8, 8, 24, 8)
+    embed_dims = (96, 192, 384, 768)
+    model = PoolFormer(
+        in_chans=in_channels,
+        num_classes=num_classes,
+        layers=layers,
+        layer_scale_init_value=1e-6,
+        embed_dims=embed_dims,
+        **kwargs
+    )
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.poolformer.poolformer_s12(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get poolformer_s12 model. +Refer to the base class models.PoolFormer for more details.

+ +
+ Source code in mindcv/models/poolformer.py +
324
+325
+326
+327
+328
+329
+330
+331
+332
@register_model
+def poolformer_s12(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolFormer:
+    """Get poolformer_s12 model.
+    Refer to the base class `models.PoolFormer` for more details."""
+    default_cfg = default_cfgs["poolformer_s12"]
+    model = PoolFormer(in_chans=in_channels, num_classes=num_classes, layers=(2, 2, 6, 2), **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.poolformer.poolformer_s24(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get poolformer_s24 model. +Refer to the base class models.PoolFormer for more details.

+ +
+ Source code in mindcv/models/poolformer.py +
335
+336
+337
+338
+339
+340
+341
+342
+343
@register_model
+def poolformer_s24(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolFormer:
+    """Get poolformer_s24 model.
+    Refer to the base class `models.PoolFormer` for more details."""
+    default_cfg = default_cfgs["poolformer_s24"]
+    model = PoolFormer(in_chans=in_channels, num_classes=num_classes, layers=(4, 4, 12, 4), **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.poolformer.poolformer_s36(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get poolformer_s36 model. +Refer to the base class models.PoolFormer for more details.

+ +
+ Source code in mindcv/models/poolformer.py +
346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
@register_model
+def poolformer_s36(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolFormer:
+    """Get poolformer_s36 model.
+    Refer to the base class `models.PoolFormer` for more details."""
+    default_cfg = default_cfgs["poolformer_s36"]
+    model = PoolFormer(
+        in_chans=in_channels, num_classes=num_classes, layers=(6, 6, 18, 6), layer_scale_init_value=1e-6, **kwargs
+    )
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

pvt

+ + +
+ + + +

+ mindcv.models.pvt.PyramidVisionTransformer + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Pyramid Vision Transformer model class, based on +"Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions" <https://arxiv.org/abs/2102.12122>_ # noqa: E501

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
img_size(int) +
+

size of a input image.

+
+

+

+
patch_size +
+

size of a single image patch.

+
+

+ + TYPE: + int) + + + DEFAULT: + 4 + +

+
in_chans +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int) + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int) + + + DEFAULT: + 1000 + +

+
embed_dims +
+

how many hidden dim in each PatchEmbed.

+
+

+ + TYPE: + list) + + + DEFAULT: + [64, 128, 320, 512] + +

+
num_heads +
+

number of attention head in each stage.

+
+

+ + TYPE: + list) + + + DEFAULT: + [1, 2, 5, 8] + +

+
mlp_ratios +
+

ratios of MLP hidden dims in each stage.

+
+

+ + TYPE: + list + + + DEFAULT: + [8, 8, 4, 4] + +

+
qkv_bias(bool) +
+

use bias in attention.

+
+

+

+
qk_scale(float) +
+

Scale multiplied by qk in attention(if not none), otherwise head_dim ** -0.5.

+
+

+

+
drop_rate(float) +
+

The drop rate for each block. Default: 0.0.

+
+

+

+
attn_drop_rate(float) +
+

The drop rate for attention. Default: 0.0.

+
+

+

+
drop_path_rate(float) +
+

The drop rate for drop path. Default: 0.0.

+
+

+

+
norm_layer(nn.Cell) +
+

Norm layer that will be used in blocks. Default: nn.LayerNorm.

+
+

+

+
depths +
+

number of Blocks.

+
+

+ + TYPE: + list) + + + DEFAULT: + [2, 2, 2, 2] + +

+
sr_ratios(list) +
+

stride and kernel size of each attention.

+
+

+

+
num_stages(int) +
+

number of stage. Default: 4.

+
+

+

+
+ +
+ Source code in mindcv/models/pvt.py +
170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
class PyramidVisionTransformer(nn.Cell):
+    r"""Pyramid Vision Transformer model class, based on
+    `"Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions" <https://arxiv.org/abs/2102.12122>`_  # noqa: E501
+
+    Args:
+        img_size(int) : size of a input image.
+        patch_size (int) : size of a single image patch.
+        in_chans (int) : number the channels of the input. Default: 3.
+        num_classes (int) : number of classification classes. Default: 1000.
+        embed_dims (list) : how many hidden dim in each PatchEmbed.
+        num_heads (list) : number of attention head in each stage.
+        mlp_ratios (list): ratios of MLP hidden dims in each stage.
+        qkv_bias(bool) : use bias in attention.
+        qk_scale(float) : Scale multiplied by qk in attention(if not none), otherwise head_dim ** -0.5.
+        drop_rate(float) : The drop rate for each block. Default: 0.0.
+        attn_drop_rate(float) : The drop rate for attention. Default: 0.0.
+        drop_path_rate(float) : The drop rate for drop path. Default: 0.0.
+        norm_layer(nn.Cell) : Norm layer that will be used in blocks. Default: nn.LayerNorm.
+        depths (list) : number of Blocks.
+        sr_ratios(list) : stride and kernel size of each attention.
+        num_stages(int) : number of stage. Default: 4.
+    """
+
+    def __init__(self, img_size=224, patch_size=4, in_chans=3, num_classes=1000, embed_dims=[64, 128, 320, 512],
+                 num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True, qk_scale=None, drop_rate=0.0,
+                 attn_drop_rate=0.0, drop_path_rate=0.0, norm_layer=nn.LayerNorm,
+                 depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1], num_stages=4):
+        super(PyramidVisionTransformer, self).__init__()
+        self.num_classes = num_classes
+        self.depths = depths
+        self.num_stages = num_stages
+        start = Tensor(0, mindspore.float32)
+        stop = Tensor(drop_path_rate, mindspore.float32)
+        dpr = [float(x) for x in ops.linspace(start, stop, sum(depths))]  # stochastic depth decay rule
+        cur = 0
+        b_list = []
+        self.pos_embed = []
+        self.pos_drop = Dropout(p=drop_rate)
+        for i in range(num_stages):
+            block = nn.CellList(
+                [Block(dim=embed_dims[i], num_heads=num_heads[i], mlp_ratio=mlp_ratios[i], qkv_bias=qkv_bias,
+                       qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + j],
+                       norm_layer=norm_layer, sr_ratio=sr_ratios[i])
+                 for j in range(depths[i])
+                 ])
+
+            b_list.append(block)
+            cur += depths[0]
+
+        self.patch_embed1 = PatchEmbed(img_size=img_size,
+                                       patch_size=patch_size,
+                                       in_chans=in_chans,
+                                       embed_dim=embed_dims[0])
+        num_patches = self.patch_embed1.num_patches
+        self.pos_embed1 = mindspore.Parameter(ops.zeros((1, num_patches, embed_dims[0]), mindspore.float16))
+        self.pos_drop1 = Dropout(p=drop_rate)
+
+        self.patch_embed2 = PatchEmbed(img_size=img_size // (2 ** (1 + 1)),
+                                       patch_size=2,
+                                       in_chans=embed_dims[1 - 1],
+                                       embed_dim=embed_dims[1])
+        num_patches = self.patch_embed2.num_patches
+        self.pos_embed2 = mindspore.Parameter(ops.zeros((1, num_patches, embed_dims[1]), mindspore.float16))
+        self.pos_drop2 = Dropout(p=drop_rate)
+
+        self.patch_embed3 = PatchEmbed(img_size=img_size // (2 ** (2 + 1)),
+                                       patch_size=2,
+                                       in_chans=embed_dims[2 - 1],
+                                       embed_dim=embed_dims[2])
+        num_patches = self.patch_embed3.num_patches
+        self.pos_embed3 = mindspore.Parameter(ops.zeros((1, num_patches, embed_dims[2]), mindspore.float16))
+        self.pos_drop3 = Dropout(p=drop_rate)
+
+        self.patch_embed4 = PatchEmbed(img_size // (2 ** (3 + 1)),
+                                       patch_size=2,
+                                       in_chans=embed_dims[3 - 1],
+                                       embed_dim=embed_dims[3])
+        num_patches = self.patch_embed4.num_patches + 1
+        self.pos_embed4 = mindspore.Parameter(ops.zeros((1, num_patches, embed_dims[3]), mindspore.float16))
+        self.pos_drop4 = Dropout(p=drop_rate)
+        self.Blocks = nn.CellList(b_list)
+
+        self.norm = norm_layer([embed_dims[3]])
+
+        # cls_token
+        self.cls_token = mindspore.Parameter(ops.zeros((1, 1, embed_dims[3]), mindspore.float32))
+
+        # classification head
+        self.head = nn.Dense(embed_dims[3], num_classes) if num_classes > 0 else Identity()
+        self.reshape = ops.reshape
+        self.transpose = ops.transpose
+        self.tile = ops.Tile()
+        self.Concat = ops.Concat(axis=1)
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Dense):
+                cell.weight.set_data(weight_init.initializer(weight_init.TruncatedNormal(sigma=0.02),
+                                                             cell.weight.shape, cell.weight.dtype))
+                if isinstance(cell, nn.Dense) and cell.bias is not None:
+                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.LayerNorm):
+                cell.gamma.set_data(weight_init.initializer(weight_init.One(), cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(weight_init.initializer(weight_init.Zero(), cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Conv2d):
+                fan_out = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels
+                fan_out //= cell.group
+                cell.weight.set_data(weight_init.initializer(weight_init.Normal(sigma=math.sqrt(2.0 / fan_out)),
+                                                             cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))
+
+    def get_classifier(self):
+        return self.head
+
+    def reset_classifier(self, num_classes, global_pool=""):
+        self.num_classes = num_classes
+        self.head = nn.Dense(self.embed_dim, num_classes) if num_classes > 0 else Identity()
+
+    def _get_pos_embed(self, pos_embed, ph, pw, H, W):
+        if H * W == self.patch_embed1.num_patches:
+            return pos_embed
+        else:
+            pos_embed = self.transpose(self.reshape(pos_embed, (1, ph, pw, -1)), (0, 3, 1, 2))
+            resize_bilinear = ops.ResizeBilinear((H, W))
+            pos_embed = resize_bilinear(pos_embed)
+
+            pos_embed = self.transpose(self.reshape(pos_embed, (1, -1, H * W)), (0, 2, 1))
+
+            return pos_embed
+
+    def forward_features(self, x):
+        B = x.shape[0]
+
+        x, (H, W) = self.patch_embed1(x)
+        pos_embed = self.pos_embed1
+        x = self.pos_drop1(x + pos_embed)
+        for blk in self.Blocks[0]:
+            x = blk(x, H, W)
+        x = self.transpose(self.reshape(x, (B, H, W, -1)), (0, 3, 1, 2))
+
+        x, (H, W) = self.patch_embed2(x)
+        ph, pw = self.patch_embed2.H, self.patch_embed2.W
+        pos_embed = self._get_pos_embed(self.pos_embed2, ph, pw, H, W)
+        x = self.pos_drop2(x + pos_embed)
+        for blk in self.Blocks[1]:
+            x = blk(x, H, W)
+        x = self.transpose(self.reshape(x, (B, H, W, -1)), (0, 3, 1, 2))
+
+        x, (H, W) = self.patch_embed3(x)
+        ph, pw = self.patch_embed3.H, self.patch_embed3.W
+        pos_embed = self._get_pos_embed(self.pos_embed3, ph, pw, H, W)
+        x = self.pos_drop3(x + pos_embed)
+        for blk in self.Blocks[2]:
+            x = blk(x, H, W)
+        x = self.transpose(self.reshape(x, (B, H, W, -1)), (0, 3, 1, 2))
+
+        x, (H, W) = self.patch_embed4(x)
+        cls_tokens = self.tile(self.cls_token, (B, 1, 1))
+
+        x = self.Concat((cls_tokens, x))
+        ph, pw = self.patch_embed4.H, self.patch_embed4.W
+        pos_embed_ = self._get_pos_embed(self.pos_embed4[:, 1:], ph, pw, H, W)
+        pos_embed = self.Concat((self.pos_embed4[:, 0:1], pos_embed_))
+        x = self.pos_drop4(x + pos_embed)
+        for blk in self.Blocks[3]:
+            x = blk(x, H, W)
+
+        x = self.norm(x)
+
+        return x[:, 0]
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        return self.head(x)
+
+    def construct(self, x):
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.pvt.pvt_large(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVT large model +Refer to the base class "models.PVT" for more details.

+ +
+ Source code in mindcv/models/pvt.py +
413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
@register_model
+def pvt_large(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformer:
+    """Get PVT large model
+    Refer to the base class "models.PVT" for more details.
+    """
+    default_cfg = default_cfgs['pvt_large']
+    model = PyramidVisionTransformer(in_chans=in_channels, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],
+                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 8, 27, 3],
+                                     sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvt.pvt_medium(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVT medium model +Refer to the base class "models.PVT" for more details.

+ +
+ Source code in mindcv/models/pvt.py +
393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
@register_model
+def pvt_medium(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformer:
+    """Get PVT medium model
+    Refer to the base class "models.PVT" for more details.
+    """
+    default_cfg = default_cfgs['pvt_medium']
+    model = PyramidVisionTransformer(in_chans=in_channels, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],
+                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 4, 18, 3],
+                                     sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvt.pvt_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVT small model +Refer to the base class "models.PVT" for more details.

+ +
+ Source code in mindcv/models/pvt.py +
373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
@register_model
+def pvt_small(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformer:
+    """Get PVT small model
+    Refer to the base class "models.PVT" for more details.
+    """
+    default_cfg = default_cfgs['pvt_small']
+    model = PyramidVisionTransformer(in_chans=in_channels, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],
+                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 4, 6, 3],
+                                     sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvt.pvt_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVT tiny model +Refer to the base class "models.PVT" for more details.

+ +
+ Source code in mindcv/models/pvt.py +
353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
@register_model
+def pvt_tiny(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformer:
+    """Get PVT tiny model
+    Refer to the base class "models.PVT" for more details.
+    """
+    default_cfg = default_cfgs['pvt_tiny']
+    model = PyramidVisionTransformer(in_chans=in_channels, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],
+                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[2, 2, 2, 2],
+                                     sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

pvtv2

+ + +
+ + + +

+ mindcv.models.pvtv2.PyramidVisionTransformerV2 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Pyramid Vision Transformer V2 model class, based on +"PVTv2: Improved Baselines with Pyramid Vision Transformer" <https://arxiv.org/abs/2106.13797>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
img_size(int) +
+

size of a input image.

+
+

+

+
patch_size +
+

size of a single image patch.

+
+

+ + TYPE: + int) + + + DEFAULT: + 16 + +

+
in_chans +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int) + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int) + + + DEFAULT: + 1000 + +

+
embed_dims +
+

how many hidden dim in each PatchEmbed.

+
+

+ + TYPE: + list) + + + DEFAULT: + [64, 128, 256, 512] + +

+
num_heads +
+

number of attention head in each stage.

+
+

+ + TYPE: + list) + + + DEFAULT: + [1, 2, 4, 8] + +

+
mlp_ratios +
+

ratios of MLP hidden dims in each stage.

+
+

+ + TYPE: + list + + + DEFAULT: + [4, 4, 4, 4] + +

+
qkv_bias(bool) +
+

use bias in attention.

+
+

+

+
qk_scale(float) +
+

Scale multiplied by qk in attention(if not none), otherwise head_dim ** -0.5.

+
+

+

+
drop_rate(float) +
+

The drop rate for each block. Default: 0.0.

+
+

+

+
attn_drop_rate(float) +
+

The drop rate for attention. Default: 0.0.

+
+

+

+
drop_path_rate(float) +
+

The drop rate for drop path. Default: 0.0.

+
+

+

+
norm_layer(nn.Cell) +
+

Norm layer that will be used in blocks. Default: nn.LayerNorm.

+
+

+

+
depths +
+

number of Blocks.

+
+

+ + TYPE: + list) + + + DEFAULT: + [3, 4, 6, 3] + +

+
sr_ratios(list) +
+

stride and kernel size of each attention.

+
+

+

+
num_stages(int) +
+

number of stage. Default: 4.

+
+

+

+
linear(bool) +
+

use linear SRA.

+
+

+

+
+ +
+ Source code in mindcv/models/pvtv2.py +
226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
class PyramidVisionTransformerV2(nn.Cell):
+    r"""Pyramid Vision Transformer V2 model class, based on
+    `"PVTv2: Improved Baselines with Pyramid Vision Transformer" <https://arxiv.org/abs/2106.13797>`_
+
+    Args:
+        img_size(int) : size of a input image.
+        patch_size (int) : size of a single image patch.
+        in_chans (int) : number the channels of the input. Default: 3.
+        num_classes (int) : number of classification classes. Default: 1000.
+        embed_dims (list) : how many hidden dim in each PatchEmbed.
+        num_heads (list) : number of attention head in each stage.
+        mlp_ratios (list): ratios of MLP hidden dims in each stage.
+        qkv_bias(bool) : use bias in attention.
+        qk_scale(float) : Scale multiplied by qk in attention(if not none), otherwise head_dim ** -0.5.
+        drop_rate(float) : The drop rate for each block. Default: 0.0.
+        attn_drop_rate(float) : The drop rate for attention. Default: 0.0.
+        drop_path_rate(float) : The drop rate for drop path. Default: 0.0.
+        norm_layer(nn.Cell) : Norm layer that will be used in blocks. Default: nn.LayerNorm.
+        depths (list) : number of Blocks.
+        sr_ratios(list) : stride and kernel size of each attention.
+        num_stages(int) : number of stage. Default: 4.
+        linear(bool) :  use linear SRA.
+    """
+
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dims=[64, 128, 256, 512],
+                 num_heads=[1, 2, 4, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=False, qk_scale=None, drop_rate=0.,
+                 attn_drop_rate=0., drop_path_rate=0., norm_layer=nn.LayerNorm,
+                 depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1], num_stages=4, linear=False):
+        super().__init__()
+        self.num_classes = num_classes
+        self.depths = depths
+        self.num_stages = num_stages
+
+        start = Tensor(0, mindspore.float32)
+        stop = Tensor(drop_path_rate, mindspore.float32)
+        dpr = [float(x) for x in ops.linspace(start, stop, sum(depths))]  # stochastic depth decay rule
+        cur = 0
+
+        patch_embed_list = []
+        block_list = []
+        norm_list = []
+
+        for i in range(num_stages):
+            patch_embed = OverlapPatchEmbed(img_size=img_size if i == 0 else img_size // (2 ** (i + 1)),
+                                            patch_size=7 if i == 0 else 3,
+                                            stride=4 if i == 0 else 2,
+                                            in_chans=in_chans if i == 0 else embed_dims[i - 1],
+                                            embed_dim=embed_dims[i])
+
+            block = nn.CellList([Block(
+                dim=embed_dims[i], num_heads=num_heads[i], mlp_ratio=mlp_ratios[i], qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + j], norm_layer=norm_layer,
+                sr_ratio=sr_ratios[i], linear=linear, block_id=j)
+                for j in range(depths[i])])
+
+            norm = norm_layer([embed_dims[i]])
+
+            cur += depths[i]
+
+            patch_embed_list.append(patch_embed)
+            block_list.append(block)
+            norm_list.append(norm)
+        self.patch_embed_list = nn.CellList(patch_embed_list)
+        self.block_list = nn.CellList(block_list)
+        self.norm_list = nn.CellList(norm_list)
+        # classification head
+        self.head = nn.Dense(embed_dims[3], num_classes) if num_classes > 0 else Identity()
+        self._initialize_weights()
+
+    def freeze_patch_emb(self):
+        self.patch_embed_list[0].requires_grad = False
+
+    def _initialize_weights(self):
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Dense):
+                cell.weight.set_data(weight_init.initializer(weight_init.TruncatedNormal(sigma=0.02),
+                                                             cell.weight.shape, cell.weight.dtype))
+                if isinstance(cell, nn.Dense) and cell.bias is not None:
+                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.LayerNorm):
+                cell.gamma.set_data(weight_init.initializer(weight_init.One(), cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(weight_init.initializer(weight_init.Zero(), cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Conv2d):
+                fan_out = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels
+                fan_out //= cell.group
+                cell.weight.set_data(weight_init.initializer(weight_init.Normal(sigma=math.sqrt(2.0 / fan_out)),
+                                                             cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))
+
+    def get_classifier(self):
+        return self.head
+
+    def reset_classifier(self, num_classes, global_pool=""):
+        self.num_classes = num_classes
+        self.head = nn.Dense(self.embed_dim, num_classes) if num_classes > 0 else Identity()
+
+    def forward_features(self, x):
+        B = x.shape[0]
+
+        for i in range(self.num_stages):
+            patch_embed = self.patch_embed_list[i]
+            block = self.block_list[i]
+            norm = self.norm_list[i]
+            x, H, W = patch_embed(x)
+            for blk in block:
+                x = blk(x, H, W)
+            x = norm(x)
+            if i != self.num_stages - 1:
+                x = ops.transpose(ops.reshape(x, (B, H, W, -1)), (0, 3, 1, 2))
+
+        return x.mean(axis=1)
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        return self.head(x)
+
+    def construct(self, x):
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.pvtv2.pvt_v2_b0(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVTV2-b0 model +Refer to the base class "models.PVTv2" for more details.

+ +
+ Source code in mindcv/models/pvtv2.py +
350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
@register_model
+def pvt_v2_b0(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformerV2:
+    """Get PVTV2-b0 model
+    Refer to the base class "models.PVTv2" for more details.
+    """
+    default_cfg = default_cfgs["pvt_v2_b0"]
+    model = PyramidVisionTransformerV2(
+        in_chans=in_channels, num_classes=num_classes,
+        patch_size=4, embed_dims=[32, 64, 160, 256], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvtv2.pvt_v2_b1(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVTV2-b1 model +Refer to the base class "models.PVTv2" for more details.

+ +
+ Source code in mindcv/models/pvtv2.py +
369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
@register_model
+def pvt_v2_b1(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformerV2:
+    """Get PVTV2-b1 model
+    Refer to the base class "models.PVTv2" for more details.
+    """
+    default_cfg = default_cfgs["pvt_v2_b1"]
+    model = PyramidVisionTransformerV2(
+        in_chans=in_channels, num_classes=num_classes,
+        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvtv2.pvt_v2_b2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVTV2-b2 model +Refer to the base class "models.PVTv2" for more details.

+ +
+ Source code in mindcv/models/pvtv2.py +
388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
@register_model
+def pvt_v2_b2(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformerV2:
+    """Get PVTV2-b2 model
+    Refer to the base class "models.PVTv2" for more details.
+    """
+    default_cfg = default_cfgs["pvt_v2_b2"]
+    model = PyramidVisionTransformerV2(
+        in_chans=in_channels, num_classes=num_classes,
+        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvtv2.pvt_v2_b3(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVTV2-b3 model +Refer to the base class "models.PVTv2" for more details.

+ +
+ Source code in mindcv/models/pvtv2.py +
407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
@register_model
+def pvt_v2_b3(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformerV2:
+    """Get PVTV2-b3 model
+    Refer to the base class "models.PVTv2" for more details.
+    """
+    default_cfg = default_cfgs["pvt_v2_b3"]
+    model = PyramidVisionTransformerV2(
+        in_chans=in_channels, num_classes=num_classes,
+        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 4, 18, 3], sr_ratios=[8, 4, 2, 1], **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvtv2.pvt_v2_b4(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVTV2-b4 model +Refer to the base class "models.PVTv2" for more details.

+ +
+ Source code in mindcv/models/pvtv2.py +
425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
@register_model
+def pvt_v2_b4(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformerV2:
+    """Get PVTV2-b4 model
+    Refer to the base class "models.PVTv2" for more details.
+    """
+    default_cfg = default_cfgs["pvt_v2_b4"]
+    model = PyramidVisionTransformerV2(
+        in_chans=in_channels, num_classes=num_classes,
+        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 8, 27, 3], sr_ratios=[8, 4, 2, 1], **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvtv2.pvt_v2_b5(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVTV2-b5 model +Refer to the base class "models.PVTv2" for more details.

+ +
+ Source code in mindcv/models/pvtv2.py +
443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
@register_model
+def pvt_v2_b5(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformerV2:
+    """Get PVTV2-b5 model
+    Refer to the base class "models.PVTv2" for more details.
+    """
+    default_cfg = default_cfgs["pvt_v2_b5"]
+    model = PyramidVisionTransformerV2(
+        in_chans=in_channels, num_classes=num_classes,
+        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 6, 40, 3], sr_ratios=[8, 4, 2, 1], **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

regnet

+ + + +
+ + + +

+mindcv.models.regnet.regnet_x_12gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
592
+593
+594
+595
+596
+597
+598
+599
@register_model
+def regnet_x_12gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_12gf"]
+    model = RegNet(73.36, 168, 2.37, 19, 112, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_16gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
602
+603
+604
+605
+606
+607
+608
+609
@register_model
+def regnet_x_16gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_16gf"]
+    model = RegNet(55.59, 216, 2.1, 22, 128, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_1_6gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
542
+543
+544
+545
+546
+547
+548
+549
@register_model
+def regnet_x_1_6gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_1_6gf"]
+    model = RegNet(34.01, 80, 2.25, 18, 24, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_200mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
502
+503
+504
+505
+506
+507
+508
+509
@register_model
+def regnet_x_200mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_200mf"]
+    model = RegNet(36.44, 24, 2.49, 13, 8, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_32gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
612
+613
+614
+615
+616
+617
+618
+619
@register_model
+def regnet_x_32gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_32gf"]
+    model = RegNet(69.86, 320, 2.0, 23, 168, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_3_2gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
552
+553
+554
+555
+556
+557
+558
+559
@register_model
+def regnet_x_3_2gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_3_2gf"]
+    model = RegNet(26.31, 88, 2.25, 25, 48, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_400mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
512
+513
+514
+515
+516
+517
+518
+519
@register_model
+def regnet_x_400mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_400mf"]
+    model = RegNet(24.48, 24, 2.54, 22, 16, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_4_0gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
562
+563
+564
+565
+566
+567
+568
+569
@register_model
+def regnet_x_4_0gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_4_0gf"]
+    model = RegNet(38.65, 96, 2.43, 23, 40, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_600mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
522
+523
+524
+525
+526
+527
+528
+529
@register_model
+def regnet_x_600mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_600mf"]
+    model = RegNet(36.97, 48, 2.24, 16, 24, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_6_4gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
572
+573
+574
+575
+576
+577
+578
+579
@register_model
+def regnet_x_6_4gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_6_4gf"]
+    model = RegNet(60.83, 184, 2.07, 17, 56, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_800mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
532
+533
+534
+535
+536
+537
+538
+539
@register_model
+def regnet_x_800mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_800mf"]
+    model = RegNet(35.73, 56, 2.28, 16, 16, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_8_0gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
582
+583
+584
+585
+586
+587
+588
+589
@register_model
+def regnet_x_8_0gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_8_0gf"]
+    model = RegNet(49.56, 80, 2.88, 23, 120, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_12gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
712
+713
+714
+715
+716
+717
+718
+719
@register_model
+def regnet_y_12gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_12gf"]
+    model = RegNet(73.36, 168, 2.37, 19, 112, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_16gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
722
+723
+724
+725
+726
+727
+728
+729
@register_model
+def regnet_y_16gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_16gf"]
+    model = RegNet(106.23, 200, 2.48, 18, 112, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_1_6gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
662
+663
+664
+665
+666
+667
+668
+669
@register_model
+def regnet_y_1_6gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_1_6gf"]
+    model = RegNet(20.71, 48, 2.65, 27, 24, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_200mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
622
+623
+624
+625
+626
+627
+628
+629
@register_model
+def regnet_y_200mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_200mf"]
+    model = RegNet(36.44, 24, 2.49, 13, 8, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_32gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
732
+733
+734
+735
+736
+737
+738
+739
@register_model
+def regnet_y_32gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_32gf"]
+    model = RegNet(115.89, 232, 2.53, 20, 232, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_3_2gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
672
+673
+674
+675
+676
+677
+678
+679
@register_model
+def regnet_y_3_2gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_3_2gf"]
+    model = RegNet(42.63, 80, 2.66, 21, 24, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_400mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
632
+633
+634
+635
+636
+637
+638
+639
@register_model
+def regnet_y_400mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_400mf"]
+    model = RegNet(27.89, 48, 2.09, 16, 8, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_4_0gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
682
+683
+684
+685
+686
+687
+688
+689
@register_model
+def regnet_y_4_0gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_4_0gf"]
+    model = RegNet(31.41, 96, 2.24, 22, 64, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_600mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
642
+643
+644
+645
+646
+647
+648
+649
@register_model
+def regnet_y_600mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_600mf"]
+    model = RegNet(32.54, 48, 2.32, 15, 16, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_6_4gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
692
+693
+694
+695
+696
+697
+698
+699
@register_model
+def regnet_y_6_4gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_6_4gf"]
+    model = RegNet(33.22, 112, 2.27, 25, 72, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_800mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
652
+653
+654
+655
+656
+657
+658
+659
@register_model
+def regnet_y_800mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_800mf"]
+    model = RegNet(38.84, 56, 2.4, 14, 16, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_8_0gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
702
+703
+704
+705
+706
+707
+708
+709
@register_model
+def regnet_y_8_0gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_8_0gf"]
+    model = RegNet(76.82, 192, 2.19, 17, 56, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

repmlp

+ + +
+ + + +

+ mindcv.models.repmlp.RepMLPNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

RepMLPNet model class, based on +"RepMLPNet: Hierarchical Vision MLP with Re-parameterized Locality" <https://arxiv.org/pdf/2112.11081v2.pdf>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
in_channels +
+

number of input channels. Default: 3.

+
+

+ + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+

+
patch_size +
+

size of a single image patch. Default: (4, 4)

+
+

+ + DEFAULT: + (4, 4) + +

+
num_blocks +
+

number of blocks per stage. Default: (2,2,6,2)

+
+

+ + DEFAULT: + (2, 2, 6, 2) + +

+
channels +
+

number of in_channels(channels[stage_idx]) and out_channels(channels[stage_idx + 1]) per stage. +Default: (192,384,768,1536)

+
+

+ + DEFAULT: + (192, 384, 768, 1536) + +

+
hs +
+

height of picture per stage. Default: (64,32,16,8)

+
+

+ + DEFAULT: + (64, 32, 16, 8) + +

+
ws +
+

width of picture per stage. Default: (64,32,16,8)

+
+

+ + DEFAULT: + (64, 32, 16, 8) + +

+
sharesets_nums +
+

number of share sets per stage. Default: (4,8,16,32)

+
+

+ + DEFAULT: + (4, 8, 16, 32) + +

+
reparam_conv_k +
+

convolution kernel size in local Perceptron. Default: (3,)

+
+

+ + DEFAULT: + (3) + +

+
globalperceptron_reduce +
+

Intermediate convolution output size +(in_channal = inchannal, out_channel = in_channel/globalperceptron_reduce) in globalperceptron. Default: 4

+
+

+ + DEFAULT: + 4 + +

+
use_checkpoint +
+

whether to use checkpoint

+
+

+ + DEFAULT: + False + +

+
deploy +
+

whether to use bias

+
+

+ + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/repmlp.py +
276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
class RepMLPNet(nn.Cell):
+    r"""RepMLPNet model class, based on
+    `"RepMLPNet: Hierarchical Vision MLP with Re-parameterized Locality" <https://arxiv.org/pdf/2112.11081v2.pdf>`_
+
+    Args:
+        in_channels: number of input channels. Default: 3.
+        num_classes: number of classification classes. Default: 1000.
+        patch_size: size of a single image patch. Default: (4, 4)
+        num_blocks: number of blocks per stage. Default: (2,2,6,2)
+        channels: number of in_channels(channels[stage_idx]) and out_channels(channels[stage_idx + 1]) per stage.
+            Default: (192,384,768,1536)
+        hs: height of picture per stage. Default: (64,32,16,8)
+        ws: width of picture per stage. Default: (64,32,16,8)
+        sharesets_nums: number of share sets per stage. Default: (4,8,16,32)
+        reparam_conv_k: convolution kernel size in local Perceptron. Default: (3,)
+        globalperceptron_reduce: Intermediate convolution output size
+            (in_channal = inchannal, out_channel = in_channel/globalperceptron_reduce) in globalperceptron. Default: 4
+        use_checkpoint: whether to use checkpoint
+        deploy: whether to use bias
+    """
+
+    def __init__(self,
+                 in_channels=3, num_class=1000,
+                 patch_size=(4, 4),
+                 num_blocks=(2, 2, 6, 2), channels=(192, 384, 768, 1536),
+                 hs=(64, 32, 16, 8), ws=(64, 32, 16, 8),
+                 sharesets_nums=(4, 8, 16, 32),
+                 reparam_conv_k=(3,),
+                 globalperceptron_reduce=4, use_checkpoint=False,
+                 deploy=False):
+        super().__init__()
+        num_stages = len(num_blocks)
+        assert num_stages == len(channels)
+        assert num_stages == len(hs)
+        assert num_stages == len(ws)
+        assert num_stages == len(sharesets_nums)
+
+        self.conv_embedding = conv_bn_relu(in_channels, channels[0], kernel_size=patch_size, stride=patch_size,
+                                           padding=0, has_bias=False)
+        self.conv2d = nn.Conv2d(in_channels, channels[0], kernel_size=patch_size, stride=patch_size, padding=0)
+
+        stages = []
+        embeds = []
+        for stage_idx in range(num_stages):
+            stage_blocks = [RepMLPNetUnit(channels=channels[stage_idx], h=hs[stage_idx], w=ws[stage_idx],
+                                          reparam_conv_k=reparam_conv_k,
+                                          globalperceptron_reduce=globalperceptron_reduce, ffn_expand=4,
+                                          num_sharesets=sharesets_nums[stage_idx],
+                                          deploy=deploy) for _ in range(num_blocks[stage_idx])]
+            stages.append(nn.CellList(stage_blocks))
+            if stage_idx < num_stages - 1:
+                embeds.append(
+                    conv_bn_relu(in_channels=channels[stage_idx], out_channels=channels[stage_idx + 1], kernel_size=2,
+                                 stride=2, padding=0))
+        self.stages = nn.CellList(stages)
+        self.embeds = nn.CellList(embeds)
+        self.head_norm = nn.BatchNorm2d(channels[-1]).set_train()
+        self.head = nn.Dense(channels[-1], num_class)
+
+        self.use_checkpoint = use_checkpoint
+        self.shape = ops.Shape()
+        self.reshape = ops.Reshape()
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        """Initialize weights for cells."""
+        for name, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                k = cell.group / (cell.in_channels * cell.kernel_size[0] * cell.kernel_size[1])
+                k = k ** 0.5
+                cell.weight.set_data(init.initializer(init.Uniform(k), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Uniform(k), cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.Dense):
+                k = 1 / cell.in_channels
+                k = k ** 0.5
+                cell.weight.set_data(init.initializer(init.Uniform(k), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Uniform(k), cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.conv_embedding(x)
+
+        for i, stage in enumerate(self.stages):
+            for block in stage:
+                x = block(x)
+
+            if i < len(self.stages) - 1:
+                embed = self.embeds[i]
+                x = embed(x)
+        x = self.head_norm(x)
+        shape = self.shape(x)
+        pool = nn.AvgPool2d(kernel_size=(shape[2], shape[3]))
+        x = pool(x)
+        return x.view(shape[0], -1)
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        return self.head(x)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        return self.forward_head(x)
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.repmlp.repmlp_b224(pretrained=False, image_size=224, num_classes=1000, in_channels=3, deploy=False, **kwargs) + +

+ + +
+ +

Get repmlp_b224 model. +Refer to the base class models.RepMLPNet for more details.

+ +
+ Source code in mindcv/models/repmlp.py +
418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
@register_model
+def repmlp_b224(pretrained: bool = False, image_size: int = 224, num_classes: int = 1000, in_channels=3,
+                deploy=False, **kwargs):
+    """Get repmlp_b224 model.
+    Refer to the base class `models.RepMLPNet` for more details."""
+    default_cfg = default_cfgs["repmlp_b224"]
+    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(96, 192, 384, 768), hs=(56, 28, 14, 7),
+                      ws=(56, 28, 14, 7),
+                      num_blocks=(2, 2, 12, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 32, 128),
+                      deploy=deploy)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repmlp.repmlp_b256(pretrained=False, image_size=256, num_classes=1000, in_channels=3, deploy=False, **kwargs) + +

+ + +
+ +

Get repmlp_b256 model. +Refer to the base class models.RepMLPNet for more details.

+ +
+ Source code in mindcv/models/repmlp.py +
434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
@register_model
+def repmlp_b256(pretrained: bool = False, image_size: int = 256, num_classes: int = 1000, in_channels=3,
+                deploy=False, **kwargs):
+    """Get repmlp_b256 model.
+    Refer to the base class `models.RepMLPNet` for more details."""
+    default_cfg = default_cfgs["repmlp_b256"]
+    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(96, 192, 384, 768), hs=(64, 32, 16, 8),
+                      ws=(64, 32, 16, 8),
+                      num_blocks=(2, 2, 12, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 32, 128),
+                      deploy=deploy)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repmlp.repmlp_d256(pretrained=False, image_size=256, num_classes=1000, in_channels=3, deploy=False, **kwargs) + +

+ + +
+ +

Get repmlp_d256 model. +Refer to the base class models.RepMLPNet for more details.

+ +
+ Source code in mindcv/models/repmlp.py +
450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
@register_model
+def repmlp_d256(pretrained: bool = False, image_size: int = 256, num_classes: int = 1000, in_channels=3,
+                deploy=False, **kwargs):
+    """Get repmlp_d256 model.
+    Refer to the base class `models.RepMLPNet` for more details."""
+    default_cfg = default_cfgs["repmlp_d256"]
+    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(80, 160, 320, 640), hs=(64, 32, 16, 8),
+                      ws=(64, 32, 16, 8),
+                      num_blocks=(2, 2, 18, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 16, 128),
+                      deploy=deploy)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repmlp.repmlp_l256(pretrained=False, image_size=256, num_classes=1000, in_channels=3, deploy=False, **kwargs) + +

+ + +
+ +

Get repmlp_l256 model. +Refer to the base class models.RepMLPNet for more details.

+ +
+ Source code in mindcv/models/repmlp.py +
466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
@register_model
+def repmlp_l256(pretrained: bool = False, image_size: int = 256, num_classes: int = 1000, in_channels=3,
+                deploy=False, **kwargs):
+    """Get repmlp_l256 model.
+    Refer to the base class `models.RepMLPNet` for more details."""
+    default_cfg = default_cfgs["repmlp_l256"]
+    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(96, 192, 384, 768), hs=(64, 32, 16, 8),
+                      ws=(64, 32, 16, 8),
+                      num_blocks=(2, 2, 18, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 32, 256),
+                      deploy=deploy)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repmlp.repmlp_t224(pretrained=False, image_size=224, num_classes=1000, in_channels=3, deploy=False, **kwargs) + +

+ + +
+ +

Get repmlp_t224 model. Refer to the base class models.RepMLPNet for more details.

+ +
+ Source code in mindcv/models/repmlp.py +
386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
@register_model
+def repmlp_t224(pretrained: bool = False, image_size: int = 224, num_classes: int = 1000, in_channels=3,
+                deploy=False, **kwargs):
+    """Get repmlp_t224 model. Refer to the base class `models.RepMLPNet` for more details."""
+    default_cfg = default_cfgs["repmlp_t224"]
+    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(64, 128, 256, 512), hs=(56, 28, 14, 7),
+                      ws=(56, 28, 14, 7),
+                      num_blocks=(2, 2, 6, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 16, 128),
+                      deploy=deploy)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repmlp.repmlp_t256(pretrained=False, image_size=256, num_classes=1000, in_channels=3, deploy=False, **kwargs) + +

+ + +
+ +

Get repmlp_t256 model. +Refer to the base class models.RepMLPNet for more details.

+ +
+ Source code in mindcv/models/repmlp.py +
402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
@register_model
+def repmlp_t256(pretrained: bool = False, image_size: int = 256, num_classes: int = 1000, in_channels=3,
+                deploy=False, **kwargs):
+    """Get repmlp_t256 model.
+    Refer to the base class `models.RepMLPNet` for more details."""
+    default_cfg = default_cfgs["repmlp_t256"]
+    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(64, 128, 256, 512), hs=(64, 32, 16, 8),
+                      ws=(64, 32, 16, 8),
+                      num_blocks=(2, 2, 6, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 16, 128),
+                      deploy=deploy)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

repvgg

+ + +
+ + + +

+ mindcv.models.repvgg.RepVGG + + +

+ + +
+

+ Bases: nn.Cell

+ + +

RepVGG model class, based on +"RepVGGBlock: An all-MLP Architecture for Vision" <https://arxiv.org/pdf/2101.03697>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_blocks +
+

number of RepVGGBlocks

+
+

+ + TYPE: + list) + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int) + + + DEFAULT: + 1000 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + in_channels) + + + DEFAULT: + 3 + +

+
width_multiplier +
+

the numbers of MLP Architecture.

+
+

+ + TYPE: + list) + + + DEFAULT: + None + +

+
override_group_map +
+

the numbers of MLP Architecture.

+
+

+ + TYPE: + dict) + + + DEFAULT: + None + +

+
deploy +
+

use rbr_reparam block or not. Default: False

+
+

+ + TYPE: + bool) + + + DEFAULT: + False + +

+
use_se +
+

use se_block or not. Default: False

+
+

+ + TYPE: + bool) + + + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/repvgg.py +
201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
class RepVGG(nn.Cell):
+    r"""RepVGG model class, based on
+    `"RepVGGBlock: An all-MLP Architecture for Vision" <https://arxiv.org/pdf/2101.03697>`_
+
+    Args:
+        num_blocks (list) : number of RepVGGBlocks
+        num_classes (int) : number of classification classes. Default: 1000.
+        in_channels (in_channels) : number the channels of the input. Default: 3.
+        width_multiplier (list) : the numbers of MLP Architecture.
+        override_group_map (dict) : the numbers of MLP Architecture.
+        deploy (bool) : use rbr_reparam block or not. Default: False
+        use_se (bool) : use se_block or not. Default: False
+    """
+
+    def __init__(self, num_blocks, num_classes=1000, in_channels=3, width_multiplier=None, override_group_map=None,
+                 deploy=False, use_se=False):
+        super().__init__()
+
+        assert len(width_multiplier) == 4
+
+        self.deploy = deploy
+        self.override_group_map = override_group_map or {}
+        self.use_se = use_se
+
+        assert 0 not in self.override_group_map
+
+        self.in_planes = min(64, int(64 * width_multiplier[0]))
+
+        self.stage0 = RepVGGBlock(in_channels=in_channels, out_channels=self.in_planes, kernel_size=3, stride=2,
+                                  padding=1,
+                                  deploy=self.deploy, use_se=self.use_se)
+        self.feature_info = [dict(chs=self.in_planes, reduction=2, name="stage0")]
+        self.cur_layer_idx = 1
+        self.stage1 = self._make_stage(
+            int(64 * width_multiplier[0]), num_blocks[0], stride=2)
+        self.feature_info.append(dict(chs=int(64 * width_multiplier[0]), reduction=4, name="stage1"))
+        self.stage2 = self._make_stage(
+            int(128 * width_multiplier[1]), num_blocks[1], stride=2)
+        self.feature_info.append(dict(chs=int(128 * width_multiplier[1]), reduction=8, name="stage2"))
+        self.stage3 = self._make_stage(
+            int(256 * width_multiplier[2]), num_blocks[2], stride=2)
+        self.feature_info.append(dict(chs=int(256 * width_multiplier[2]), reduction=16, name="stage3"))
+        self.stage4 = self._make_stage(
+            int(512 * width_multiplier[3]), num_blocks[3], stride=2)
+        self.feature_info.append(dict(chs=int(512 * width_multiplier[3]), reduction=32, name="stage4"))
+        self.gap = GlobalAvgPooling()
+        self.linear = nn.Dense(int(512 * width_multiplier[3]), num_classes)
+        self._initialize_weights()
+
+    def _make_stage(self, planes, num_blocks, stride):
+        strides = [stride] + [1] * (num_blocks - 1)
+        blocks = []
+        for s in strides:
+            cur_group = self.override_group_map.get(self.cur_layer_idx, 1)
+            blocks.append(RepVGGBlock(in_channels=self.in_planes, out_channels=planes, kernel_size=3,
+                                      stride=s, padding=1, group=cur_group, deploy=self.deploy,
+                                      use_se=self.use_se))
+            self.in_planes = planes
+            self.cur_layer_idx += 1
+
+        return nn.SequentialCell(blocks)
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(mode='fan_out', nonlinearity='relu'),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer('zeros', cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer('ones', cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer('zeros', cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(mode='fan_in', nonlinearity='sigmoid'),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer('zeros', cell.bias.shape, cell.bias.dtype))
+
+    def construct(self, x):
+        x = self.stage0(x)
+        x = self.stage1(x)
+        x = self.stage2(x)
+        x = self.stage3(x)
+        x = self.stage4(x)
+        x = self.gap(x)
+        x = self.linear(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_a0(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[0.75, 0.75, 0.75, 2.5]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
298
+299
+300
+301
+302
+303
+304
+305
+306
@register_model
+def repvgg_a0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[0.75, 0.75, 0.75, 2.5].
+    Refer to the base class `models.RepVGG` for more details.
+    """
+    default_cfg = default_cfgs["repvgg_a0"]
+    model_args = dict(num_blocks=[2, 4, 14, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[0.75, 0.75, 0.75, 2.5], override_group_map=None, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_a1(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[1.0, 1.0, 1.0, 2.5]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
309
+310
+311
+312
+313
+314
+315
+316
+317
@register_model
+def repvgg_a1(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[1.0, 1.0, 1.0, 2.5].
+     Refer to the base class `models.RepVGG` for more details.
+     """
+    default_cfg = default_cfgs["repvgg_a1"]
+    model_args = dict(num_blocks=[2, 4, 14, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[1.0, 1.0, 1.0, 2.5], override_group_map=None, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_a2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[1.5, 1.5, 1.5, 2.75]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
320
+321
+322
+323
+324
+325
+326
+327
+328
@register_model
+def repvgg_a2(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[1.5, 1.5, 1.5, 2.75].
+     Refer to the base class `models.RepVGG` for more details.
+     """
+    default_cfg = default_cfgs["repvgg_a2"]
+    model_args = dict(num_blocks=[2, 4, 14, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[1.5, 1.5, 1.5, 2.75], override_group_map=None, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_b0(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[1.0, 1.0, 1.0, 2.5]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
331
+332
+333
+334
+335
+336
+337
+338
+339
@register_model
+def repvgg_b0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[1.0, 1.0, 1.0, 2.5].
+     Refer to the base class `models.RepVGG` for more details.
+     """
+    default_cfg = default_cfgs['repvgg_b0']
+    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[1.0, 1.0, 1.0, 2.5], override_group_map=None, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_b1(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
342
+343
+344
+345
+346
+347
+348
+349
+350
@register_model
+def repvgg_b1(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0].
+     Refer to the base class `models.RepVGG` for more details.
+     """
+    default_cfg = default_cfgs['repvgg_b1']
+    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[2.0, 2.0, 2.0, 4.0], override_group_map=None, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_b1g2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
380
+381
+382
+383
+384
+385
+386
+387
+388
@register_model
+def repvgg_b1g2(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0].
+    Refer to the base class `models.RepVGG` for more details.
+    """
+    default_cfg = default_cfgs["repvgg_b1g2"]
+    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[2.0, 2.0, 2.0, 4.0], override_group_map=g2_map, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_b1g4(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
391
+392
+393
+394
+395
+396
+397
+398
+399
@register_model
+def repvgg_b1g4(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0].
+    Refer to the base class `models.RepVGG` for more details.
+    """
+    default_cfg = default_cfgs["repvgg_b1g4"]
+    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[2.0, 2.0, 2.0, 4.0], override_group_map=g4_map, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_b2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.5, 2.5, 2.5, 5.0]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
353
+354
+355
+356
+357
+358
+359
+360
+361
@register_model
+def repvgg_b2(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.5, 2.5, 2.5, 5.0].
+     Refer to the base class `models.RepVGG` for more details.
+     """
+    default_cfg = default_cfgs['repvgg_b2']
+    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[2.5, 2.5, 2.5, 5.0], override_group_map=None, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_b2g4(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.5, 2.5, 2.5, 5.0]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
402
+403
+404
+405
+406
+407
+408
+409
+410
@register_model
+def repvgg_b2g4(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.5, 2.5, 2.5, 5.0].
+    Refer to the base class `models.RepVGG` for more details.
+    """
+    default_cfg = default_cfgs["repvgg_b2g4"]
+    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[2.5, 2.5, 2.5, 5.0], override_group_map=g4_map, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_b3(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[3.0, 3.0, 3.0, 5.0]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
364
+365
+366
+367
+368
+369
+370
+371
+372
@register_model
+def repvgg_b3(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[3.0, 3.0, 3.0, 5.0].
+     Refer to the base class `models.RepVGG` for more details.
+     """
+    default_cfg = default_cfgs['repvgg_b3']
+    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[3.0, 3.0, 3.0, 5.0], override_group_map=None, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +

res2net

+ + +
+ + + +

+ mindcv.models.res2net.Res2Net + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Res2Net model class, based on +"Res2Net: A New Multi-scale Backbone Architecture" <https://arxiv.org/abs/1904.01169>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
block +
+

block of resnet.

+
+

+ + TYPE: + Type[nn.Cell] + +

+
layer_nums +
+

number of layers of each stage.

+
+

+ + TYPE: + List[int] + +

+
version +
+

variety of Res2Net, 'res2net' or 'res2net_v1b'. Default: 'res2net'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'res2net' + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
groups +
+

number of groups for group conv in blocks. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
base_width +
+

base width of pre group hidden channel in blocks. Default: 26.

+
+

+ + TYPE: + int + + + DEFAULT: + 26 + +

+
scale +
+

scale factor of Bottle2neck. Default: 4.

+
+

+ + DEFAULT: + 4 + +

+
norm +
+

normalization layer in blocks. Default: None.

+
+

+ + TYPE: + Optional[nn.Cell] + + + DEFAULT: + None + +

+
+ +
+ Source code in mindcv/models/res2net.py +
142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
class Res2Net(nn.Cell):
+    r"""Res2Net model class, based on
+    `"Res2Net: A New Multi-scale Backbone Architecture" <https://arxiv.org/abs/1904.01169>`_
+
+    Args:
+        block: block of resnet.
+        layer_nums: number of layers of each stage.
+        version: variety of Res2Net, 'res2net' or 'res2net_v1b'. Default: 'res2net'.
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number the channels of the input. Default: 3.
+        groups: number of groups for group conv in blocks. Default: 1.
+        base_width: base width of pre group hidden channel in blocks. Default: 26.
+        scale: scale factor of Bottle2neck. Default: 4.
+        norm: normalization layer in blocks. Default: None.
+    """
+
+    def __init__(
+        self,
+        block: Type[nn.Cell],
+        layer_nums: List[int],
+        version: str = "res2net",
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        groups: int = 1,
+        base_width: int = 26,
+        scale=4,
+        norm: Optional[nn.Cell] = None,
+    ) -> None:
+        super().__init__()
+        assert version in ["res2net", "res2net_v1b"]
+        self.version = version
+
+        if norm is None:
+            norm = nn.BatchNorm2d
+        self.norm = norm
+
+        self.num_classes = num_classes
+        self.input_channels = 64
+        self.groups = groups
+        self.base_width = base_width
+        self.scale = scale
+        if self.version == "res2net":
+            self.conv1 = nn.Conv2d(in_channels, self.input_channels, kernel_size=7,
+                                   stride=2, padding=3, pad_mode="pad")
+        elif self.version == "res2net_v1b":
+            self.conv1 = nn.SequentialCell([
+                nn.Conv2d(in_channels, self.input_channels // 2, kernel_size=3,
+                          stride=2, padding=1, pad_mode="pad"),
+                norm(self.input_channels // 2),
+                nn.ReLU(),
+                nn.Conv2d(self.input_channels // 2, self.input_channels // 2, kernel_size=3,
+                          stride=1, padding=1, pad_mode="pad"),
+                norm(self.input_channels // 2),
+                nn.ReLU(),
+                nn.Conv2d(self.input_channels // 2, self.input_channels, kernel_size=3,
+                          stride=1, padding=1, pad_mode="pad"),
+            ])
+
+        self.bn1 = norm(self.input_channels)
+        self.relu = nn.ReLU()
+        self.max_pool = nn.SequentialCell([
+            nn.Pad(paddings=((0, 0), (0, 0), (1, 1), (1, 1)), mode="CONSTANT"),
+            nn.MaxPool2d(kernel_size=3, stride=2)
+        ])
+        self.layer1 = self._make_layer(block, 64, layer_nums[0])
+        self.layer2 = self._make_layer(block, 128, layer_nums[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layer_nums[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layer_nums[3], stride=2)
+
+        self.pool = GlobalAvgPooling()
+        self.num_features = 512 * block.expansion
+        self.classifier = nn.Dense(self.num_features, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(math.sqrt(5), mode="fan_out", nonlinearity="relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="leaky_relu"),
+                                         cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="leaky_relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def _make_layer(
+        self,
+        block: Type[nn.Cell],
+        channels: int,
+        block_nums: int,
+        stride: int = 1,
+    ) -> nn.SequentialCell:
+        down_sample = None
+
+        if stride != 1 or self.input_channels != channels * block.expansion:
+            if stride == 1 or self.version == "res2net":
+                down_sample = nn.SequentialCell([
+                    nn.Conv2d(self.input_channels, channels * block.expansion, kernel_size=1, stride=stride),
+                    self.norm(channels * block.expansion)
+                ])
+            else:
+                down_sample = nn.SequentialCell([
+                    nn.AvgPool2d(kernel_size=stride, stride=stride, pad_mode="same"),
+                    nn.Conv2d(self.input_channels, channels * block.expansion, kernel_size=1, stride=1),
+                    self.norm(channels * block.expansion)
+                ])
+
+        layers = []
+        layers.append(
+            block(
+                self.input_channels,
+                channels,
+                stride=stride,
+                down_sample=down_sample,
+                groups=self.groups,
+                base_width=self.base_width,
+                scale=self.scale,
+                stype="stage",
+                norm=self.norm,
+            )
+        )
+        self.input_channels = channels * block.expansion
+
+        for _ in range(1, block_nums):
+            layers.append(
+                block(
+                    self.input_channels,
+                    channels,
+                    groups=self.groups,
+                    base_width=self.base_width,
+                    scale=self.scale,
+                    norm=self.norm,
+                )
+            )
+
+        return nn.SequentialCell(layers)
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.max_pool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.res2net.res2net101(pretrained=False, num_classes=1001, in_channels=3, **kwargs) + +

+ + +
+ +

Get 101 layers Res2Net model. +Refer to the base class models.Res2Net for more details.

+ +
+ Source code in mindcv/models/res2net.py +
326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
@register_model
+def res2net101(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):
+    """Get 101 layers Res2Net model.
+    Refer to the base class `models.Res2Net` for more details.
+    """
+    default_cfg = default_cfgs["res2net101"]
+    model = Res2Net(Bottle2neck, [3, 4, 23, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.res2net.res2net101_v1b(pretrained=False, num_classes=1001, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/res2net.py +
366
+367
+368
+369
+370
+371
+372
+373
+374
+375
@register_model
+def res2net101_v1b(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["res2net101_v1b"]
+    model = Res2Net(Bottle2neck, [3, 4, 23, 3], version="res2net_v1b", num_classes=num_classes,
+                    in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.res2net.res2net152(pretrained=False, num_classes=1001, in_channels=3, **kwargs) + +

+ + +
+ +

Get 152 layers Res2Net model. +Refer to the base class models.Res2Net for more details.

+ +
+ Source code in mindcv/models/res2net.py +
340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
@register_model
+def res2net152(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):
+    """Get 152 layers Res2Net model.
+    Refer to the base class `models.Res2Net` for more details.
+    """
+    default_cfg = default_cfgs["res2net152"]
+    model = Res2Net(Bottle2neck, [3, 8, 36, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.res2net.res2net152_v1b(pretrained=False, num_classes=1001, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/res2net.py +
378
+379
+380
+381
+382
+383
+384
+385
+386
+387
@register_model
+def res2net152_v1b(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["res2net152_v1b"]
+    model = Res2Net(Bottle2neck, [3, 8, 36, 3], version="res2net_v1b", num_classes=num_classes,
+                    in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.res2net.res2net50(pretrained=False, num_classes=1001, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers Res2Net model. +Refer to the base class models.Res2Net for more details.

+ +
+ Source code in mindcv/models/res2net.py +
312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
@register_model
+def res2net50(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):
+    """Get 50 layers Res2Net model.
+    Refer to the base class `models.Res2Net` for more details.
+    """
+    default_cfg = default_cfgs["res2net50"]
+    model = Res2Net(Bottle2neck, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.res2net.res2net50_v1b(pretrained=False, num_classes=1001, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/res2net.py +
354
+355
+356
+357
+358
+359
+360
+361
+362
+363
@register_model
+def res2net50_v1b(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["res2net50_v1b"]
+    model = Res2Net(Bottle2neck, [3, 4, 6, 3], version="res2net_v1b", num_classes=num_classes,
+                    in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

resnest

+ + +
+ + + +

+ mindcv.models.resnest.ResNeSt + + +

+ + +
+

+ Bases: nn.Cell

+ + +

ResNeSt model class, based on +"ResNeSt: Split-Attention Networks" <https://arxiv.org/abs/2004.08955>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
block +
+

Class for the residual block. Option is Bottleneck.

+
+

+ + TYPE: + Type[Bottleneck] + +

+
layers +
+

Numbers of layers in each block.

+
+

+ + TYPE: + List[int] + +

+
radix +
+

Number of groups for Split-Attention conv. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
group +
+

Number of groups for the conv in each bottleneck block. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
bottleneck_width +
+

bottleneck channels factor. Default: 64.

+
+

+ + TYPE: + int + + + DEFAULT: + 64 + +

+
num_classes +
+

Number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
dilated +
+

Applying dilation strategy to pretrained ResNeSt yielding a stride-8 model, + typically used in Semantic Segmentation. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
dilation +
+

Number of dilation in the conv. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
deep_stem +
+

three 3x3 convolution layers of widths stem_width, stem_width, stem_width * 2. + Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
stem_width +
+

number of channels in stem convolutions. Default: 64.

+
+

+ + TYPE: + int + + + DEFAULT: + 64 + +

+
avg_down +
+

use avg pooling for projection skip connection between stages/downsample. + Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
avd +
+

use avg pooling before or after split-attention conv. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
avd_first +
+

use avg pooling before or after split-attention conv. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
drop_rate +
+

Drop probability for the Dropout layer. Default: 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
norm_layer +
+

Normalization layer used in backbone network. Default: nn.BatchNorm2d.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + nn.BatchNorm2d + +

+
+ +
+ Source code in mindcv/models/resnest.py +
225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
class ResNeSt(nn.Cell):
+    r"""ResNeSt model class, based on
+    `"ResNeSt: Split-Attention Networks" <https://arxiv.org/abs/2004.08955>`_
+
+    Args:
+        block: Class for the residual block. Option is Bottleneck.
+        layers: Numbers of layers in each block.
+        radix: Number of groups for Split-Attention conv. Default: 1.
+        group: Number of groups for the conv in each bottleneck block. Default: 1.
+        bottleneck_width: bottleneck channels factor. Default: 64.
+        num_classes: Number of classification classes. Default: 1000.
+        dilated: Applying dilation strategy to pretrained ResNeSt yielding a stride-8 model,
+                 typically used in Semantic Segmentation. Default: False.
+        dilation: Number of dilation in the conv. Default: 1.
+        deep_stem: three 3x3 convolution layers of widths stem_width, stem_width, stem_width * 2.
+                   Default: False.
+        stem_width: number of channels in stem convolutions. Default: 64.
+        avg_down: use avg pooling for projection skip connection between stages/downsample.
+                  Default: False.
+        avd: use avg pooling before or after split-attention conv. Default: False.
+        avd_first: use avg pooling before or after split-attention conv. Default: False.
+        drop_rate: Drop probability for the Dropout layer. Default: 0.
+        norm_layer: Normalization layer used in backbone network. Default: nn.BatchNorm2d.
+    """
+
+    def __init__(
+        self,
+        block: Type[Bottleneck],
+        layers: List[int],
+        radix: int = 1,
+        group: int = 1,
+        bottleneck_width: int = 64,
+        num_classes: int = 1000,
+        dilated: bool = False,
+        dilation: int = 1,
+        deep_stem: bool = False,
+        stem_width: int = 64,
+        avg_down: bool = False,
+        avd: bool = False,
+        avd_first: bool = False,
+        drop_rate: float = 0.0,
+        norm_layer: nn.Cell = nn.BatchNorm2d,
+    ) -> None:
+        super(ResNeSt, self).__init__()
+        self.cardinality = group
+        self.bottleneck_width = bottleneck_width
+        # ResNet-D params
+        self.inplanes = stem_width * 2 if deep_stem else 64
+        self.avg_down = avg_down
+        # ResNeSt params
+        self.radix = radix
+        self.avd = avd
+        self.avd_first = avd_first
+
+        if deep_stem:
+            self.conv1 = nn.SequentialCell([
+                nn.Conv2d(3, stem_width, kernel_size=3, stride=2, pad_mode="pad",
+                          padding=1, has_bias=False),
+                norm_layer(stem_width),
+                nn.ReLU(),
+                nn.Conv2d(stem_width, stem_width, kernel_size=3, stride=1, pad_mode="pad",
+                          padding=1, has_bias=False),
+                norm_layer(stem_width),
+                nn.ReLU(),
+                nn.Conv2d(stem_width, stem_width * 2, kernel_size=3, stride=1, pad_mode="pad",
+                          padding=1, has_bias=False),
+            ])
+        else:
+            self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, pad_mode="pad", padding=3,
+                                   has_bias=False)
+
+        self.bn1 = norm_layer(self.inplanes)
+        self.relu = nn.ReLU()
+        self.feature_info = [dict(chs=self.inplanes, reduction=2, name="relu")]
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
+
+        self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer, is_first=False)
+        self.feature_info.append(dict(chs=block.expansion * 64, reduction=4, name='layer1'))
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_layer=norm_layer)
+        self.feature_info.append(dict(chs=block.expansion * 128, reduction=8, name='layer2'))
+
+        if dilated or dilation == 4:
+            self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2, norm_layer=norm_layer)
+            self.feature_info.append(dict(chs=block.expansion * 256, reduction=8, name='layer3'))
+            self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, norm_layer=norm_layer)
+            self.feature_info.append(dict(chs=block.expansion * 512, reduction=8, name='layer4'))
+        elif dilation == 2:
+            self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilation=1, norm_layer=norm_layer)
+            self.feature_info.append(dict(chs=block.expansion * 256, reduction=16, name='layer3'))
+            self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=2, norm_layer=norm_layer)
+            self.feature_info.append(dict(chs=block.expansion * 512, reduction=16, name='layer4'))
+        else:
+            self.layer3 = self._make_layer(block, 256, layers[2], stride=2, norm_layer=norm_layer)
+            self.feature_info.append(dict(chs=block.expansion * 256, reduction=16, name='layer3'))
+            self.layer4 = self._make_layer(block, 512, layers[3], stride=2, norm_layer=norm_layer)
+            self.feature_info.append(dict(chs=block.expansion * 512, reduction=32, name='layer4'))
+
+        self.avgpool = GlobalAvgPooling()
+        self.drop = Dropout(p=drop_rate) if drop_rate > 0.0 else None
+        self.fc = nn.Dense(512 * block.expansion, num_classes)
+
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(
+                        init.HeNormal(mode="fan_out", nonlinearity="relu"), cell.weight.shape, cell.weight.dtype
+                    )
+                )
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(
+                        init.HeUniform(mode="fan_in", nonlinearity="sigmoid"), cell.weight.shape, cell.weight.dtype
+                    )
+                )
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def _make_layer(
+        self,
+        block: Type[Bottleneck],
+        planes: int,
+        blocks: int,
+        stride: int = 1,
+        dilation: int = 1,
+        norm_layer: Optional[nn.Cell] = None,
+        is_first: bool = True,
+    ) -> nn.SequentialCell:
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            down_layers = []
+            if self.avg_down:
+                if dilation == 1:
+                    down_layers.append(nn.AvgPool2d(kernel_size=stride, stride=stride, pad_mode="valid"))
+                else:
+                    down_layers.append(nn.AvgPool2d(kernel_size=1, stride=1, pad_mode="valid"))
+
+                down_layers.append(nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1,
+                                             stride=1, has_bias=False))
+            else:
+                down_layers.append(
+                    nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride,
+                              has_bias=False))
+            down_layers.append(norm_layer(planes * block.expansion))
+            downsample = nn.SequentialCell(down_layers)
+
+        layers = []
+        if dilation == 1 or dilation == 2:
+            layers.append(
+                block(
+                    self.inplanes,
+                    planes,
+                    stride,
+                    downsample=downsample,
+                    radix=self.radix,
+                    cardinality=self.cardinality,
+                    bottleneck_width=self.bottleneck_width,
+                    avd=self.avd,
+                    avd_first=self.avd_first,
+                    dilation=1,
+                    is_first=is_first,
+                    norm_layer=norm_layer,
+                )
+            )
+        elif dilation == 4:
+            layers.append(
+                block(
+                    self.inplanes,
+                    planes,
+                    stride,
+                    downsample=downsample,
+                    radix=self.radix,
+                    cardinality=self.cardinality,
+                    bottleneck_width=self.bottleneck_width,
+                    avd=self.avd,
+                    avd_first=self.avd_first,
+                    dilation=2,
+                    is_first=is_first,
+                    norm_layer=norm_layer,
+                )
+            )
+        else:
+            raise ValueError(f"Unsupported model type {dilation}")
+
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(
+                block(
+                    self.inplanes,
+                    planes,
+                    radix=self.radix,
+                    cardinality=self.cardinality,
+                    bottleneck_width=self.bottleneck_width,
+                    avd=self.avd,
+                    avd_first=self.avd_first,
+                    dilation=dilation,
+                    norm_layer=norm_layer,
+                )
+            )
+
+        return nn.SequentialCell(layers)
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.avgpool(x)
+        if self.drop:
+            x = self.drop(x)
+        x = self.fc(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.resnest.resnest101(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/resnest.py +
494
+495
+496
+497
+498
+499
+500
+501
@register_model
+def resnest101(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["resnest101"]
+    model_args = dict(block=Bottleneck, layers=[3, 4, 23, 3], radix=2, group=1,
+                      bottleneck_width=64, num_classes=num_classes,
+                      deep_stem=True, stem_width=64, avg_down=True,
+                      avd=True, avd_first=False, **kwargs)
+    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnest.resnest14(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/resnest.py +
464
+465
+466
+467
+468
+469
+470
+471
@register_model
+def resnest14(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["resnest14"]
+    model_args = dict(block=Bottleneck, layers=[1, 1, 1, 1], radix=2, group=1,
+                      bottleneck_width=64, num_classes=num_classes,
+                      deep_stem=True, stem_width=32, avg_down=True,
+                      avd=True, avd_first=False, **kwargs)
+    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnest.resnest200(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/resnest.py +
504
+505
+506
+507
+508
+509
+510
+511
@register_model
+def resnest200(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["resnest200"]
+    model_args = dict(block=Bottleneck, layers=[3, 24, 36, 3], radix=2, group=1,
+                      bottleneck_width=64, num_classes=num_classes,
+                      deep_stem=True, stem_width=64, avg_down=True,
+                      avd=True, avd_first=False, **kwargs)
+    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnest.resnest26(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/resnest.py +
474
+475
+476
+477
+478
+479
+480
+481
@register_model
+def resnest26(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["resnest26"]
+    model_args = dict(block=Bottleneck, layers=[2, 2, 2, 2], radix=2, group=1,
+                      bottleneck_width=64, num_classes=num_classes,
+                      deep_stem=True, stem_width=32, avg_down=True,
+                      avd=True, avd_first=False, **kwargs)
+    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnest.resnest269(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/resnest.py +
514
+515
+516
+517
+518
+519
+520
+521
@register_model
+def resnest269(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["resnest269"]
+    model_args = dict(block=Bottleneck, layers=[3, 30, 48, 8], radix=2, group=1,
+                      bottleneck_width=64, num_classes=num_classes,
+                      deep_stem=True, stem_width=64, avg_down=True,
+                      avd=True, avd_first=False, **kwargs)
+    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnest.resnest50(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/resnest.py +
484
+485
+486
+487
+488
+489
+490
+491
@register_model
+def resnest50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["resnest50"]
+    model_args = dict(block=Bottleneck, layers=[3, 4, 6, 3], radix=2, group=1,
+                      bottleneck_width=64, num_classes=num_classes,
+                      deep_stem=True, stem_width=32, avg_down=True,
+                      avd=True, avd_first=False, **kwargs)
+    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +

resnet

+ + +
+ + + +

+ mindcv.models.resnet.ResNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

ResNet model class, based on +"Deep Residual Learning for Image Recognition" <https://arxiv.org/abs/1512.03385>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
block +
+

block of resnet.

+
+

+ + TYPE: + Type[Union[BasicBlock, Bottleneck]] + +

+
layers +
+

number of layers of each stage.

+
+

+ + TYPE: + List[int] + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
groups +
+

number of groups for group conv in blocks. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
base_width +
+

base width of pre group hidden channel in blocks. Default: 64.

+
+

+ + TYPE: + int + + + DEFAULT: + 64 + +

+
norm +
+

normalization layer in blocks. Default: None.

+
+

+ + TYPE: + Optional[nn.Cell] + + + DEFAULT: + None + +

+
+ +
+ Source code in mindcv/models/resnet.py +
163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
class ResNet(nn.Cell):
+    r"""ResNet model class, based on
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/abs/1512.03385>`_
+
+    Args:
+        block: block of resnet.
+        layers: number of layers of each stage.
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number the channels of the input. Default: 3.
+        groups: number of groups for group conv in blocks. Default: 1.
+        base_width: base width of pre group hidden channel in blocks. Default: 64.
+        norm: normalization layer in blocks. Default: None.
+    """
+
+    def __init__(
+        self,
+        block: Type[Union[BasicBlock, Bottleneck]],
+        layers: List[int],
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        groups: int = 1,
+        base_width: int = 64,
+        norm: Optional[nn.Cell] = None,
+    ) -> None:
+        super().__init__()
+        if norm is None:
+            norm = nn.BatchNorm2d
+
+        self.norm: nn.Cell = norm  # add type hints to make pylint happy
+        self.input_channels = 64
+        self.groups = groups
+        self.base_with = base_width
+
+        self.conv1 = nn.Conv2d(in_channels, self.input_channels, kernel_size=7,
+                               stride=2, pad_mode="pad", padding=3)
+        self.bn1 = norm(self.input_channels)
+        self.relu = nn.ReLU()
+        self.feature_info = [dict(chs=self.input_channels, reduction=2, name="relu")]
+        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.feature_info.append(dict(chs=block.expansion * 64, reduction=4, name="layer1"))
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.feature_info.append(dict(chs=block.expansion * 128, reduction=8, name="layer2"))
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.feature_info.append(dict(chs=block.expansion * 256, reduction=16, name="layer3"))
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+        self.feature_info.append(dict(chs=block.expansion * 512, reduction=32, name="layer4"))
+
+        self.pool = GlobalAvgPooling()
+        self.num_features = 512 * block.expansion
+        self.classifier = nn.Dense(self.num_features, num_classes)
+
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(mode='fan_out', nonlinearity='relu'),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer('zeros', cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer('ones', cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer('zeros', cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(mode='fan_in', nonlinearity='sigmoid'),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer('zeros', cell.bias.shape, cell.bias.dtype))
+
+    def _make_layer(
+        self,
+        block: Type[Union[BasicBlock, Bottleneck]],
+        channels: int,
+        block_nums: int,
+        stride: int = 1,
+    ) -> nn.SequentialCell:
+        """build model depending on cfgs"""
+        down_sample = None
+
+        if stride != 1 or self.input_channels != channels * block.expansion:
+            down_sample = nn.SequentialCell([
+                nn.Conv2d(self.input_channels, channels * block.expansion, kernel_size=1, stride=stride),
+                self.norm(channels * block.expansion)
+            ])
+
+        layers = []
+        layers.append(
+            block(
+                self.input_channels,
+                channels,
+                stride=stride,
+                down_sample=down_sample,
+                groups=self.groups,
+                base_width=self.base_with,
+                norm=self.norm,
+            )
+        )
+        self.input_channels = channels * block.expansion
+
+        for _ in range(1, block_nums):
+            layers.append(
+                block(
+                    self.input_channels,
+                    channels,
+                    groups=self.groups,
+                    base_width=self.base_with,
+                    norm=self.norm
+                )
+            )
+
+        return nn.SequentialCell(layers)
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        """Network forward feature extraction."""
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.max_pool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.resnet.ResNet.forward_features(x) + +

+ + +
+ +

Network forward feature extraction.

+ +
+ Source code in mindcv/models/resnet.py +
280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
def forward_features(self, x: Tensor) -> Tensor:
+    """Network forward feature extraction."""
+    x = self.conv1(x)
+    x = self.bn1(x)
+    x = self.relu(x)
+    x = self.max_pool(x)
+
+    x = self.layer1(x)
+    x = self.layer2(x)
+    x = self.layer3(x)
+    x = self.layer4(x)
+    return x
+
+
+
+ +
+ + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnet101(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 101 layers ResNet model. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
341
+342
+343
+344
+345
+346
+347
+348
+349
@register_model
+def resnet101(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 101 layers ResNet model.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnet101"]
+    model_args = dict(block=Bottleneck, layers=[3, 4, 23, 3], num_classes=num_classes, in_channels=in_channels,
+                      **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnet152(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 152 layers ResNet model. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
352
+353
+354
+355
+356
+357
+358
+359
+360
@register_model
+def resnet152(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 152 layers ResNet model.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnet152"]
+    model_args = dict(block=Bottleneck, layers=[3, 8, 36, 3], num_classes=num_classes, in_channels=in_channels,
+                      **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnet18(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 18 layers ResNet model. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
308
+309
+310
+311
+312
+313
+314
+315
+316
@register_model
+def resnet18(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 18 layers ResNet model.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnet18"]
+    model_args = dict(block=BasicBlock, layers=[2, 2, 2, 2], num_classes=num_classes, in_channels=in_channels,
+                      **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnet34(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 34 layers ResNet model. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
319
+320
+321
+322
+323
+324
+325
+326
+327
@register_model
+def resnet34(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 34 layers ResNet model.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnet34"]
+    model_args = dict(block=BasicBlock, layers=[3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels,
+                      **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnet50(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers ResNet model. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
330
+331
+332
+333
+334
+335
+336
+337
+338
@register_model
+def resnet50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 50 layers ResNet model.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnet50"]
+    model_args = dict(block=Bottleneck, layers=[3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels,
+                      **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnext101_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 101 layers ResNeXt model with 32 groups of GPConv. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
374
+375
+376
+377
+378
+379
+380
+381
+382
@register_model
+def resnext101_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 101 layers ResNeXt model with 32 groups of GPConv.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnext101_32x4d"]
+    model_args = dict(block=Bottleneck, layers=[3, 4, 23, 3], groups=32, base_width=4, num_classes=num_classes,
+                      in_channels=in_channels, **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnext101_64x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 101 layers ResNeXt model with 64 groups of GPConv. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
385
+386
+387
+388
+389
+390
+391
+392
+393
@register_model
+def resnext101_64x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 101 layers ResNeXt model with 64 groups of GPConv.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnext101_64x4d"]
+    model_args = dict(block=Bottleneck, layers=[3, 4, 23, 3], groups=64, base_width=4, num_classes=num_classes,
+                      in_channels=in_channels, **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnext152_64x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/resnet.py +
396
+397
+398
+399
+400
+401
@register_model
+def resnext152_64x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["resnext152_64x4d"]
+    model_args = dict(block=Bottleneck, layers=[3, 8, 36, 3], groups=64, base_width=4, num_classes=num_classes,
+                      in_channels=in_channels, **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnext50_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers ResNeXt model with 32 groups of GPConv. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
363
+364
+365
+366
+367
+368
+369
+370
+371
@register_model
+def resnext50_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 50 layers ResNeXt model with 32 groups of GPConv.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnext50_32x4d"]
+    model_args = dict(block=Bottleneck, layers=[3, 4, 6, 3], groups=32, base_width=4, num_classes=num_classes,
+                      in_channels=in_channels, **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +

resnetv2

+ + + +
+ + + +

+mindcv.models.resnetv2.resnetv2_101(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 101 layers ResNetV2 model. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnetv2.py +
108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
@register_model
+def resnetv2_101(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 101 layers ResNetV2 model.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnetv2_101"]
+    model = ResNet(PreActBottleneck, [3, 4, 23, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnetv2.resnetv2_50(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers ResNetV2 model. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnetv2.py +
 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
@register_model
+def resnetv2_50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 50 layers ResNetV2 model.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs['resnetv2_50']
+    model = ResNet(PreActBottleneck, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

rexnet

+ + +
+ + + +

+ mindcv.models.rexnet.ReXNetV1 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

ReXNet model class, based on +"Rethinking Channel Dimensions for Efficient Model Design" <https://arxiv.org/abs/2007.00992>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
in_channels +
+

number of the input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
fi_channels +
+

number of the final channels. Default: 180.

+
+

+ + TYPE: + int + + + DEFAULT: + 180 + +

+
initial_channels +
+

initialize inplanes. Default: 16.

+
+

+ + TYPE: + int + + + DEFAULT: + 16 + +

+
width_mult +
+

The ratio of the channel. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
depth_mult +
+

The ratio of num_layers. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int) + + + DEFAULT: + 1000 + +

+
use_se +
+

use SENet in LinearBottleneck. Default: True.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
se_ratio +
+

(float): SENet reduction ratio. Default 1/12.

+
+

+ + DEFAULT: + 1 / 12 + +

+
drop_rate +
+

dropout ratio. Default: 0.2.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.2 + +

+
ch_div +
+

divisible by ch_div. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
act_layer +
+

activation function in ConvNormAct. Default: nn.SiLU.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + nn.SiLU + +

+
dw_act_layer +
+

activation function after dw_conv. Default: nn.ReLU6.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + nn.ReLU6 + +

+
cls_useconv +
+

use conv in classification. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/rexnet.py +
106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
class ReXNetV1(nn.Cell):
+    r"""ReXNet model class, based on
+    `"Rethinking Channel Dimensions for Efficient Model Design" <https://arxiv.org/abs/2007.00992>`_
+
+    Args:
+        in_channels (int): number of the input channels. Default: 3.
+        fi_channels (int): number of the final channels. Default: 180.
+        initial_channels (int): initialize inplanes. Default: 16.
+        width_mult (float): The ratio of the channel. Default: 1.0.
+        depth_mult (float): The ratio of num_layers. Default: 1.0.
+        num_classes (int) : number of classification classes. Default: 1000.
+        use_se (bool): use SENet in LinearBottleneck. Default: True.
+        se_ratio: (float): SENet reduction ratio. Default 1/12.
+        drop_rate (float): dropout ratio. Default: 0.2.
+        ch_div (int): divisible by ch_div. Default: 1.
+        act_layer (nn.Cell): activation function in ConvNormAct. Default: nn.SiLU.
+        dw_act_layer (nn.Cell): activation function after dw_conv. Default: nn.ReLU6.
+        cls_useconv (bool): use conv in classification. Default: False.
+    """
+
+    def __init__(
+        self,
+        in_channels=3,
+        fi_channels=180,
+        initial_channels=16,
+        width_mult=1.0,
+        depth_mult=1.0,
+        num_classes=1000,
+        use_se=True,
+        se_ratio=1 / 12,
+        drop_rate=0.2,
+        drop_path_rate=0.0,
+        ch_div=1,
+        act_layer=nn.SiLU,
+        dw_act_layer=nn.ReLU6,
+        cls_useconv=False,
+    ):
+        super(ReXNetV1, self).__init__()
+
+        layers = [1, 2, 2, 3, 3, 5]
+        strides = [1, 2, 2, 2, 1, 2]
+        use_ses = [False, False, True, True, True, True]
+
+        layers = [ceil(element * depth_mult) for element in layers]
+        strides = sum([[element] + [1] * (layers[idx] - 1)
+                       for idx, element in enumerate(strides)], [])
+        if use_se:
+            use_ses = sum([[element] * layers[idx] for idx, element in enumerate(use_ses)], [])
+        else:
+            use_ses = [False] * sum(layers[:])
+        exp_ratios = [1] * layers[0] + [6] * sum(layers[1:])
+
+        self.depth = sum(layers[:]) * 3
+        stem_channel = 32 / width_mult if width_mult < 1.0 else 32
+        inplanes = initial_channels / width_mult if width_mult < 1.0 else initial_channels
+
+        features = []
+        in_channels_group = []
+        out_channels_group = []
+
+        for i in range(self.depth // 3):
+            if i == 0:
+                in_channels_group.append(int(round(stem_channel * width_mult)))
+                out_channels_group.append(int(round(inplanes * width_mult)))
+            else:
+                in_channels_group.append(int(round(inplanes * width_mult)))
+                inplanes += fi_channels / (self.depth // 3 * 1.0)
+                out_channels_group.append(int(round(inplanes * width_mult)))
+
+        stem_chs = make_divisible(round(stem_channel * width_mult), divisor=ch_div)
+        self.stem = Conv2dNormActivation(in_channels, stem_chs, stride=2, padding=1, activation=act_layer)
+
+        feat_chs = [stem_chs]
+        self.feature_info = []
+        curr_stride = 2
+        features = []
+        num_blocks = len(in_channels_group)
+        for block_idx, (in_c, out_c, exp_ratio, stride, use_se) in enumerate(
+            zip(in_channels_group, out_channels_group, exp_ratios, strides, use_ses)
+        ):
+            if stride > 1:
+                fname = "stem" if block_idx == 0 else f"features.{block_idx - 1}"
+                self.feature_info += [dict(chs=feat_chs[-1], reduction=curr_stride, name=fname)]
+            block_dpr = drop_path_rate * block_idx / (num_blocks - 1)  # stochastic depth linear decay rule
+            drop_path = DropPath(block_dpr) if block_dpr > 0. else None
+            features.append(LinearBottleneck(in_channels=in_c,
+                                             out_channels=out_c,
+                                             exp_ratio=exp_ratio,
+                                             stride=stride,
+                                             use_se=use_se,
+                                             se_ratio=se_ratio,
+                                             act_layer=act_layer,
+                                             dw_act_layer=dw_act_layer,
+                                             drop_path=drop_path))
+            curr_stride *= stride
+            feat_chs.append(out_c)
+
+        pen_channels = make_divisible(int(1280 * width_mult), divisor=ch_div)
+        self.feature_info += [dict(chs=feat_chs[-1], reduction=curr_stride, name=f'features.{len(features) - 1}')]
+        self.flatten_sequential = True
+        features.append(Conv2dNormActivation(out_channels_group[-1],
+                                             pen_channels,
+                                             kernel_size=1,
+                                             activation=act_layer))
+
+        features.append(GlobalAvgPooling(keep_dims=True))
+        self.useconv = cls_useconv
+        self.features = nn.SequentialCell(*features)
+        if self.useconv:
+            self.cls = nn.SequentialCell(
+                Dropout(p=drop_rate),
+                nn.Conv2d(pen_channels, num_classes, 1, has_bias=True))
+        else:
+            self.cls = nn.SequentialCell(
+                Dropout(p=drop_rate),
+                nn.Dense(pen_channels, num_classes))
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, (nn.Conv2d, nn.Dense)):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="leaky_relu"),
+                                         [1, cell.bias.shape[0]], cell.bias.dtype).reshape((-1)))
+
+    def forward_features(self, x):
+        x = self.stem(x)
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x):
+        if not self.useconv:
+            x = x.reshape((x.shape[0], -1))
+            x = self.cls(x)
+        else:
+            x = self.cls(x).reshape((x.shape[0], -1))
+        return x
+
+    def construct(self, x):
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.rexnet.rexnet_09(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ReXNet model with width multiplier of 0.9. +Refer to the base class models.ReXNetV1 for more details.

+ +
+ Source code in mindcv/models/rexnet.py +
269
+270
+271
+272
+273
+274
@register_model
+def rexnet_09(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ReXNetV1:
+    """Get ReXNet model with width multiplier of 0.9.
+    Refer to the base class `models.ReXNetV1` for more details.
+    """
+    return _rexnet("rexnet_09", 0.9, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.rexnet.rexnet_10(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ReXNet model with width multiplier of 1.0. +Refer to the base class models.ReXNetV1 for more details.

+ +
+ Source code in mindcv/models/rexnet.py +
277
+278
+279
+280
+281
+282
@register_model
+def rexnet_10(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ReXNetV1:
+    """Get ReXNet model with width multiplier of 1.0.
+    Refer to the base class `models.ReXNetV1` for more details.
+    """
+    return _rexnet("rexnet_10", 1.0, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.rexnet.rexnet_13(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ReXNet model with width multiplier of 1.3. +Refer to the base class models.ReXNetV1 for more details.

+ +
+ Source code in mindcv/models/rexnet.py +
285
+286
+287
+288
+289
+290
@register_model
+def rexnet_13(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ReXNetV1:
+    """Get ReXNet model with width multiplier of 1.3.
+    Refer to the base class `models.ReXNetV1` for more details.
+    """
+    return _rexnet("rexnet_13", 1.3, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.rexnet.rexnet_15(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ReXNet model with width multiplier of 1.5. +Refer to the base class models.ReXNetV1 for more details.

+ +
+ Source code in mindcv/models/rexnet.py +
293
+294
+295
+296
+297
+298
@register_model
+def rexnet_15(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ReXNetV1:
+    """Get ReXNet model with width multiplier of 1.5.
+    Refer to the base class `models.ReXNetV1` for more details.
+    """
+    return _rexnet("rexnet_15", 1.5, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.rexnet.rexnet_20(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ReXNet model with width multiplier of 2.0. +Refer to the base class models.ReXNetV1 for more details.

+ +
+ Source code in mindcv/models/rexnet.py +
301
+302
+303
+304
+305
+306
@register_model
+def rexnet_20(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ReXNetV1:
+    """Get ReXNet model with width multiplier of 2.0.
+    Refer to the base class `models.ReXNetV1` for more details.
+    """
+    return _rexnet("rexnet_20", 2.0, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +

senet

+ + +
+ + + +

+ mindcv.models.senet.SENet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

SENet model class, based on +"Squeeze-and-Excitation Networks" <https://arxiv.org/abs/1709.01507>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
block +
+

block class of SENet.

+
+

+ + TYPE: + Type[Union[SEBottleneck, SEResNetBottleneck, SEResNetBlock, SEResNeXtBottleneck]] + +

+
layers +
+

Number of residual blocks for 4 layers.

+
+

+ + TYPE: + List[int] + +

+
group +
+

Number of groups for the conv in each bottleneck block.

+
+

+ + TYPE: + int + +

+
reduction +
+

Reduction ratio for Squeeze-and-Excitation modules.

+
+

+ + TYPE: + int + +

+
drop_rate +
+

Drop probability for the Dropout layer. Default: 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
inplanes +
+

Number of input channels for layer1. Default: 64.

+
+

+ + TYPE: + int + + + DEFAULT: + 64 + +

+
input3x3 +
+

If True, use three 3x3 convolutions in layer0. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
downsample_kernel_size +
+

Kernel size for downsampling convolutions. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
downsample_padding +
+

Padding for downsampling convolutions. Default: 0.

+
+

+ + TYPE: + int + + + DEFAULT: + 0 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
+ +
+ Source code in mindcv/models/senet.py +
234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
class SENet(nn.Cell):
+    r"""SENet model class, based on
+    `"Squeeze-and-Excitation Networks" <https://arxiv.org/abs/1709.01507>`_
+
+    Args:
+        block: block class of SENet.
+        layers: Number of residual blocks for 4 layers.
+        group: Number of groups for the conv in each bottleneck block.
+        reduction: Reduction ratio for Squeeze-and-Excitation modules.
+        drop_rate: Drop probability for the Dropout layer. Default: 0.
+        in_channels: number the channels of the input. Default: 3.
+        inplanes:  Number of input channels for layer1. Default: 64.
+        input3x3: If `True`, use three 3x3 convolutions in layer0. Default: False.
+        downsample_kernel_size: Kernel size for downsampling convolutions. Default: 1.
+        downsample_padding: Padding for downsampling convolutions. Default: 0.
+        num_classes (int): number of classification classes. Default: 1000.
+    """
+
+    def __init__(
+        self,
+        block: Type[Union[SEBottleneck, SEResNetBottleneck, SEResNetBlock, SEResNeXtBottleneck]],
+        layers: List[int],
+        group: int,
+        reduction: int,
+        drop_rate: float = 0.0,
+        in_channels: int = 3,
+        inplanes: int = 64,
+        input3x3: bool = False,
+        downsample_kernel_size: int = 1,
+        downsample_padding: int = 0,
+        num_classes: int = 1000,
+    ) -> None:
+        super(SENet, self).__init__()
+        self.inplanes = inplanes
+        self.num_classes = num_classes
+        self.drop_rate = drop_rate
+        if input3x3:
+            self.layer0 = nn.SequentialCell([
+                nn.Conv2d(in_channels, 64, 3, stride=2, pad_mode="pad", padding=1, has_bias=False),
+                nn.BatchNorm2d(64),
+                nn.ReLU(),
+                nn.Conv2d(64, 64, 3, stride=1, pad_mode="pad", padding=1, has_bias=False),
+                nn.BatchNorm2d(64),
+                nn.ReLU(),
+                nn.Conv2d(64, inplanes, 3, stride=1, pad_mode="pad", padding=1, has_bias=False),
+                nn.BatchNorm2d(inplanes),
+                nn.ReLU()
+            ])
+        else:
+            self.layer0 = nn.SequentialCell([
+                nn.Conv2d(in_channels, inplanes, kernel_size=7, stride=2, pad_mode="pad",
+                          padding=3, has_bias=False),
+                nn.BatchNorm2d(inplanes),
+                nn.ReLU()
+            ])
+        self.pool0 = nn.MaxPool2d(3, stride=2, pad_mode="same")
+
+        self.layer1 = self._make_layer(block, planes=64, blocks=layers[0], group=group,
+                                       reduction=reduction, downsample_kernel_size=1,
+                                       downsample_padding=0)
+
+        self.layer2 = self._make_layer(block, planes=128, blocks=layers[1], stride=2,
+                                       group=group, reduction=reduction,
+                                       downsample_kernel_size=downsample_kernel_size,
+                                       downsample_padding=downsample_padding)
+
+        self.layer3 = self._make_layer(block, planes=256, blocks=layers[2], stride=2,
+                                       group=group, reduction=reduction,
+                                       downsample_kernel_size=downsample_kernel_size,
+                                       downsample_padding=downsample_padding)
+
+        self.layer4 = self._make_layer(block, planes=512, blocks=layers[3], stride=2,
+                                       group=group, reduction=reduction,
+                                       downsample_kernel_size=downsample_kernel_size,
+                                       downsample_padding=downsample_padding)
+
+        self.num_features = 512 * block.expansion
+
+        self.pool = GlobalAvgPooling()
+        if self.drop_rate > 0.:
+            self.dropout = Dropout(p=self.drop_rate)
+        self.classifier = nn.Dense(self.num_features, self.num_classes)
+
+        self._initialize_weights()
+
+    def _make_layer(
+        self,
+        block: Type[Union[SEBottleneck, SEResNetBottleneck, SEResNetBlock, SEResNeXtBottleneck]],
+        planes: int,
+        blocks: int,
+        group: int,
+        reduction: int,
+        stride: int = 1,
+        downsample_kernel_size: int = 1,
+        downsample_padding: int = 0,
+    ) -> nn.SequentialCell:
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.SequentialCell([
+                nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=downsample_kernel_size,
+                          stride=stride, pad_mode="pad", padding=downsample_padding, has_bias=False),
+                nn.BatchNorm2d(planes * block.expansion)
+            ])
+
+        layers = [block(self.inplanes, planes, group, reduction, stride, downsample)]
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes, group, reduction))
+
+        return nn.SequentialCell(layers)
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(mode="fan_out", nonlinearity="relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(mode="fan_in", nonlinearity="sigmoid"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.layer0(x)
+        x = self.pool0(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        if self.drop_rate > 0.0:
+            x = self.dropout(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.senet.senet154(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
@register_model
+def senet154(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["senet154"]
+    model = SENet(block=SEBottleneck, layers=[3, 8, 36, 3], group=64, reduction=16,
+                  downsample_kernel_size=3, downsample_padding=1,  inplanes=128, input3x3=True,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnet101(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
436
+437
+438
+439
+440
+441
+442
+443
+444
+445
@register_model
+def seresnet101(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnet101"]
+    model = SENet(block=SEResNetBottleneck, layers=[3, 4, 23, 3], group=1, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnet152(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
448
+449
+450
+451
+452
+453
+454
+455
+456
+457
@register_model
+def seresnet152(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnet152"]
+    model = SENet(block=SEResNetBottleneck, layers=[3, 8, 36, 3], group=1, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnet18(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
400
+401
+402
+403
+404
+405
+406
+407
+408
+409
@register_model
+def seresnet18(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnet18"]
+    model = SENet(block=SEResNetBlock, layers=[2, 2, 2, 2], group=1, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnet34(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
412
+413
+414
+415
+416
+417
+418
+419
+420
+421
@register_model
+def seresnet34(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnet34"]
+    model = SENet(block=SEResNetBlock, layers=[3, 4, 6, 3], group=1, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnet50(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
424
+425
+426
+427
+428
+429
+430
+431
+432
+433
@register_model
+def seresnet50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnet50"]
+    model = SENet(block=SEResNetBottleneck, layers=[3, 4, 6, 3], group=1, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnext101_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
484
+485
+486
+487
+488
+489
+490
+491
+492
+493
@register_model
+def seresnext101_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnext101_32x4d"]
+    model = SENet(block=SEResNeXtBottleneck, layers=[3, 4, 23, 3], group=32, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnext26_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
460
+461
+462
+463
+464
+465
+466
+467
+468
+469
@register_model
+def seresnext26_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnext26_32x4d"]
+    model = SENet(block=SEResNeXtBottleneck, layers=[2, 2, 2, 2], group=32, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnext50_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
472
+473
+474
+475
+476
+477
+478
+479
+480
+481
@register_model
+def seresnext50_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnext50_32x4d"]
+    model = SENet(block=SEResNeXtBottleneck, layers=[3, 4, 6, 3], group=32, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

shufflenetv1

+ + +
+ + + +

+ mindcv.models.shufflenetv1.ShuffleNetV1 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

ShuffleNetV1 model class, based on +"ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" <https://arxiv.org/abs/1707.01083>_ # noqa: E501

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number of input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
model_size +
+

scale factor which controls the number of channels. Default: '2.0x'.

+
+

+ + TYPE: + str + + + DEFAULT: + '2.0x' + +

+
group +
+

number of group for group convolution. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Source code in mindcv/models/shufflenetv1.py +
119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
class ShuffleNetV1(nn.Cell):
+    r"""ShuffleNetV1 model class, based on
+    `"ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" <https://arxiv.org/abs/1707.01083>`_  # noqa: E501
+
+    Args:
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number of input channels. Default: 3.
+        model_size: scale factor which controls the number of channels. Default: '2.0x'.
+        group: number of group for group convolution. Default: 3.
+    """
+
+    def __init__(
+        self,
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        model_size: str = "2.0x",
+        group: int = 3,
+    ):
+        super().__init__()
+        self.stage_repeats = [4, 8, 4]
+        self.model_size = model_size
+        if group == 3:
+            if model_size == "0.5x":
+                self.stage_out_channels = [-1, 12, 120, 240, 480]
+            elif model_size == "1.0x":
+                self.stage_out_channels = [-1, 24, 240, 480, 960]
+            elif model_size == "1.5x":
+                self.stage_out_channels = [-1, 24, 360, 720, 1440]
+            elif model_size == "2.0x":
+                self.stage_out_channels = [-1, 48, 480, 960, 1920]
+            else:
+                raise NotImplementedError
+        elif group == 8:
+            if model_size == "0.5x":
+                self.stage_out_channels = [-1, 16, 192, 384, 768]
+            elif model_size == "1.0x":
+                self.stage_out_channels = [-1, 24, 384, 768, 1536]
+            elif model_size == "1.5x":
+                self.stage_out_channels = [-1, 24, 576, 1152, 2304]
+            elif model_size == "2.0x":
+                self.stage_out_channels = [-1, 48, 768, 1536, 3072]
+            else:
+                raise NotImplementedError
+
+        # building first layer
+        input_channel = self.stage_out_channels[1]
+        self.first_conv = nn.SequentialCell(
+            nn.Conv2d(in_channels, input_channel, kernel_size=3, stride=2, pad_mode="pad", padding=1),
+            nn.BatchNorm2d(input_channel),
+            nn.ReLU(),
+        )
+        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
+
+        features = []
+        for idxstage, numrepeat in enumerate(self.stage_repeats):
+            output_channel = self.stage_out_channels[idxstage + 2]
+            for i in range(numrepeat):
+                stride = 2 if i == 0 else 1
+                first_group = idxstage == 0 and i == 0
+                features.append(ShuffleV1Block(input_channel, output_channel,
+                                               group=group, first_group=first_group,
+                                               mid_channels=output_channel // 4, stride=stride))
+                input_channel = output_channel
+
+        self.features = nn.SequentialCell(features)
+        self.global_pool = GlobalAvgPooling()
+        self.classifier = nn.Dense(self.stage_out_channels[-1], num_classes, has_bias=False)
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        """Initialize weights for cells."""
+        for name, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                if "first" in name:
+                    cell.weight.set_data(
+                        init.initializer(init.Normal(0.01, 0), cell.weight.shape, cell.weight.dtype))
+                else:
+                    cell.weight.set_data(
+                        init.initializer(init.Normal(1.0 / cell.weight.shape[1], 0), cell.weight.shape,
+                                         cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.Normal(0.01, 0), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.first_conv(x)
+        x = self.max_pool(x)
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.global_pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g3_05(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 0.5 and 3 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
@register_model
+def shufflenet_v1_g3_05(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 0.5 and 3 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g3_05"]
+    model = ShuffleNetV1(group=3, model_size="0.5x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g3_10(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 1.0 and 3 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
@register_model
+def shufflenet_v1_g3_10(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 1.0 and 3 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g3_10"]
+    model = ShuffleNetV1(group=3, model_size="1.0x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g3_15(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 1.5 and 3 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
@register_model
+def shufflenet_v1_g3_15(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 1.5 and 3 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g3_15"]
+    model = ShuffleNetV1(group=3, model_size="1.5x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g3_20(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 2.0 and 3 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
@register_model
+def shufflenet_v1_g3_20(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 2.0 and 3 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g3_20"]
+    model = ShuffleNetV1(group=3, model_size="2.0x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g8_05(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 0.5 and 8 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
@register_model
+def shufflenet_v1_g8_05(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 0.5 and 8 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g8_05"]
+    model = ShuffleNetV1(group=8, model_size="0.5x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g8_10(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 1.0 and 8 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
@register_model
+def shufflenet_v1_g8_10(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 1.0 and 8 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g8_10"]
+    model = ShuffleNetV1(group=8, model_size="1.0x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g8_15(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 1.5 and 8 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
@register_model
+def shufflenet_v1_g8_15(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 1.5 and 8 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g8_15"]
+    model = ShuffleNetV1(group=8, model_size="1.5x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g8_20(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 2.0 and 8 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
@register_model
+def shufflenet_v1_g8_20(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 2.0 and 8 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g8_20"]
+    model = ShuffleNetV1(group=8, model_size="2.0x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

shufflenetv2

+ + +
+ + + +

+ mindcv.models.shufflenetv2.ShuffleNetV2 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

ShuffleNetV2 model class, based on +"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" <https://arxiv.org/abs/1807.11164>_

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number of input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
model_size +
+

scale factor which controls the number of channels. Default: '1.5x'.

+
+

+ + TYPE: + str + + + DEFAULT: + '1.5x' + +

+
+ +
+ Source code in mindcv/models/shufflenetv2.py +
117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
class ShuffleNetV2(nn.Cell):
+    r"""ShuffleNetV2 model class, based on
+    `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" <https://arxiv.org/abs/1807.11164>`_
+
+    Args:
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number of input channels. Default: 3.
+        model_size: scale factor which controls the number of channels. Default: '1.5x'.
+    """
+
+    def __init__(
+        self,
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        model_size: str = "1.5x",
+    ):
+        super().__init__()
+
+        self.stage_repeats = [4, 8, 4]
+        self.model_size = model_size
+        if model_size == "0.5x":
+            self.stage_out_channels = [-1, 24, 48, 96, 192, 1024]
+        elif model_size == "1.0x":
+            self.stage_out_channels = [-1, 24, 116, 232, 464, 1024]
+        elif model_size == "1.5x":
+            self.stage_out_channels = [-1, 24, 176, 352, 704, 1024]
+        elif model_size == "2.0x":
+            self.stage_out_channels = [-1, 24, 244, 488, 976, 2048]
+        else:
+            raise NotImplementedError
+
+        # building first layer
+        input_channel = self.stage_out_channels[1]
+        self.first_conv = nn.SequentialCell([
+            nn.Conv2d(in_channels, input_channel, kernel_size=3, stride=2,
+                      pad_mode="pad", padding=1),
+            nn.BatchNorm2d(input_channel),
+            nn.ReLU(),
+        ])
+        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
+
+        self.features = []
+        for idxstage, numrepeat in enumerate(self.stage_repeats):
+            output_channel = self.stage_out_channels[idxstage + 2]
+            for i in range(numrepeat):
+                if i == 0:
+                    self.features.append(ShuffleV2Block(input_channel, output_channel,
+                                                        mid_channels=output_channel // 2, kernel_size=3, stride=2))
+                else:
+                    self.features.append(ShuffleV2Block(input_channel // 2, output_channel,
+                                                        mid_channels=output_channel // 2, kernel_size=3, stride=1))
+                input_channel = output_channel
+
+        self.features = nn.SequentialCell(self.features)
+
+        self.conv_last = nn.SequentialCell([
+            nn.Conv2d(input_channel, self.stage_out_channels[-1], kernel_size=1, stride=1),
+            nn.BatchNorm2d(self.stage_out_channels[-1]),
+            nn.ReLU()
+        ])
+        self.pool = GlobalAvgPooling()
+        self.classifier = nn.Dense(self.stage_out_channels[-1], num_classes, has_bias=False)
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        """Initialize weights for cells."""
+        for name, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                if "first" in name:
+                    cell.weight.set_data(
+                        init.initializer(init.Normal(0.01, 0), cell.weight.shape, cell.weight.dtype))
+                else:
+                    cell.weight.set_data(
+                        init.initializer(init.Normal(1.0 / cell.weight.shape[1], 0), cell.weight.shape,
+                                         cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.Normal(0.01, 0), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.first_conv(x)
+        x = self.max_pool(x)
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.conv_last(x)
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv2.shufflenet_v2_x0_5(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV2 model with width scaled by 0.5. +Refer to the base class models.ShuffleNetV2 for more details.

+ +
+ Source code in mindcv/models/shufflenetv2.py +
220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
@register_model
+def shufflenet_v2_x0_5(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV2:
+    """Get ShuffleNetV2 model with width scaled by 0.5.
+    Refer to the base class `models.ShuffleNetV2` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v2_x0_5"]
+    model = ShuffleNetV2(model_size="0.5x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv2.shufflenet_v2_x1_0(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV2 model with width scaled by 1.0. +Refer to the base class models.ShuffleNetV2 for more details.

+ +
+ Source code in mindcv/models/shufflenetv2.py +
234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
@register_model
+def shufflenet_v2_x1_0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV2:
+    """Get ShuffleNetV2 model with width scaled by 1.0.
+    Refer to the base class `models.ShuffleNetV2` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v2_x1_0"]
+    model = ShuffleNetV2(model_size="1.0x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv2.shufflenet_v2_x1_5(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV2 model with width scaled by 1.5. +Refer to the base class models.ShuffleNetV2 for more details.

+ +
+ Source code in mindcv/models/shufflenetv2.py +
248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
@register_model
+def shufflenet_v2_x1_5(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV2:
+    """Get ShuffleNetV2 model with width scaled by 1.5.
+    Refer to the base class `models.ShuffleNetV2` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v2_x1_5"]
+    model = ShuffleNetV2(model_size="1.5x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv2.shufflenet_v2_x2_0(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV2 model with width scaled by 2.0. +Refer to the base class models.ShuffleNetV2 for more details.

+ +
+ Source code in mindcv/models/shufflenetv2.py +
262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
@register_model
+def shufflenet_v2_x2_0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV2:
+    """Get ShuffleNetV2 model with width scaled by 2.0.
+    Refer to the base class `models.ShuffleNetV2` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v2_x2_0"]
+    model = ShuffleNetV2(model_size="2.0x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

sknet

+ + +
+ + + +

+ mindcv.models.sknet.SKNet + + +

+ + +
+

+ Bases: ResNet

+ + +

SKNet model class, based on +"Selective Kernel Networks" <https://arxiv.org/abs/1903.06586>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
block +
+

block of sknet.

+
+

+ + TYPE: + Type[nn.Cell] + +

+
layers +
+

number of layers of each stage.

+
+

+ + TYPE: + List[int] + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
groups +
+

number of groups for group conv in blocks. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
base_width +
+

base width of pre group hidden channel in blocks. Default: 64.

+
+

+ + TYPE: + int + + + DEFAULT: + 64 + +

+
norm +
+

normalization layer in blocks. Default: None.

+
+

+ + TYPE: + Optional[nn.Cell] + + + DEFAULT: + None + +

+
sk_kwargs +
+

kwargs of selective kernel. Default: None.

+
+

+ + TYPE: + Optional[Dict] + + + DEFAULT: + None + +

+
+ +
+ Source code in mindcv/models/sknet.py +
144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
class SKNet(ResNet):
+    r"""SKNet model class, based on
+    `"Selective Kernel Networks" <https://arxiv.org/abs/1903.06586>`_
+
+    Args:
+        block: block of sknet.
+        layers: number of layers of each stage.
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number the channels of the input. Default: 3.
+        groups: number of groups for group conv in blocks. Default: 1.
+        base_width: base width of pre group hidden channel in blocks. Default: 64.
+        norm: normalization layer in blocks. Default: None.
+        sk_kwargs: kwargs of selective kernel. Default: None.
+    """
+
+    def __init__(
+        self,
+        block: Type[nn.Cell],
+        layers: List[int],
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        groups: int = 1,
+        base_width: int = 64,
+        norm: Optional[nn.Cell] = None,
+        sk_kwargs: Optional[Dict] = None,
+    ) -> None:
+        self.sk_kwargs: Optional[Dict] = sk_kwargs  # make pylint happy
+        super().__init__(block, layers, num_classes, in_channels, groups, base_width, norm)
+
+    def _make_layer(
+        self,
+        block: Type[Union[SelectiveKernelBasic, SelectiveKernelBottleneck]],
+        channels: int,
+        block_nums: int,
+        stride: int = 1,
+    ) -> nn.SequentialCell:
+        down_sample = None
+
+        if stride != 1 or self.input_channels != channels * block.expansion:
+            down_sample = nn.SequentialCell([
+                nn.Conv2d(self.input_channels, channels * block.expansion, kernel_size=1, stride=stride),
+                self.norm(channels * block.expansion)
+            ])
+
+        layers = []
+        layers.append(
+            block(
+                self.input_channels,
+                channels,
+                stride=stride,
+                down_sample=down_sample,
+                groups=self.groups,
+                base_width=self.base_with,
+                norm=self.norm,
+                sk_kwargs=self.sk_kwargs,
+            )
+        )
+        self.input_channels = channels * block.expansion
+
+        for _ in range(1, block_nums):
+            layers.append(
+                block(
+                    self.input_channels,
+                    channels,
+                    groups=self.groups,
+                    base_width=self.base_with,
+                    norm=self.norm,
+                    sk_kwargs=self.sk_kwargs,
+                )
+            )
+
+        return nn.SequentialCell(layers)
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.sknet.skresnet18(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 18 layers SKNet model. +Refer to the base class models.SKNet for more details.

+ +
+ Source code in mindcv/models/sknet.py +
218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
@register_model
+def skresnet18(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ResNet:
+    """Get 18 layers SKNet model.
+    Refer to the base class `models.SKNet` for more details.
+    """
+    default_cfg = default_cfgs["skresnet18"]
+    sk_kwargs = dict(rd_ratio=1 / 8, rd_divisor=16, split_input=True)
+    model = SKNet(SelectiveKernelBasic, [2, 2, 2, 2], num_classes=num_classes, in_channels=in_channels,
+                  sk_kwargs=sk_kwargs, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.sknet.skresnet34(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 34 layers SKNet model. +Refer to the base class models.SKNet for more details.

+ +
+ Source code in mindcv/models/sknet.py +
234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
@register_model
+def skresnet34(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ResNet:
+    """Get 34 layers SKNet model.
+    Refer to the base class `models.SKNet` for more details.
+    """
+    default_cfg = default_cfgs["skresnet34"]
+    sk_kwargs = dict(rd_ratio=1 / 8, rd_divisor=16, split_input=True)
+    model = SKNet(SelectiveKernelBasic, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels,
+                  sk_kwargs=sk_kwargs, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.sknet.skresnet50(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers SKNet model. +Refer to the base class models.SKNet for more details.

+ +
+ Source code in mindcv/models/sknet.py +
250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
@register_model
+def skresnet50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ResNet:
+    """Get 50 layers SKNet model.
+    Refer to the base class `models.SKNet` for more details.
+    """
+    default_cfg = default_cfgs["skresnet50"]
+    sk_kwargs = dict(split_input=True)
+    model = SKNet(SelectiveKernelBottleneck, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels,
+                  sk_kwargs=sk_kwargs, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.sknet.skresnext50_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers SKNeXt model with 32 groups of GPConv. +Refer to the base class models.SKNet for more details.

+ +
+ Source code in mindcv/models/sknet.py +
266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
@register_model
+def skresnext50_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ResNet:
+    """Get 50 layers SKNeXt model with 32 groups of GPConv.
+    Refer to the base class `models.SKNet` for more details.
+    """
+    default_cfg = default_cfgs["skresnext50_32x4d"]
+    sk_kwargs = dict(rd_ratio=1 / 16, rd_divisor=32, split_input=False)
+    model = SKNet(SelectiveKernelBottleneck, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels,
+                  sk_kwargs=sk_kwargs, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

squeezenet

+ + +
+ + + +

+ mindcv.models.squeezenet.SqueezeNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

SqueezeNet model class, based on +"SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size" <https://arxiv.org/abs/1602.07360>_ # noqa: E501

+

.. note:: + Important: In contrast to the other models the inception_v3 expects tensors with a size of + N x 3 x 227 x 227, so ensure your images are sized accordingly.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
version +
+

version of the architecture, '1_0' or '1_1'. Default: '1_0'.

+
+

+ + TYPE: + str + + + DEFAULT: + '1_0' + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
drop_rate +
+

dropout rate of the classifier. Default: 0.5.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.5 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Source code in mindcv/models/squeezenet.py +
 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
class SqueezeNet(nn.Cell):
+    r"""SqueezeNet model class, based on
+    `"SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size" <https://arxiv.org/abs/1602.07360>`_  # noqa: E501
+
+    .. note::
+        **Important**: In contrast to the other models the inception_v3 expects tensors with a size of
+        N x 3 x 227 x 227, so ensure your images are sized accordingly.
+
+    Args:
+        version: version of the architecture, '1_0' or '1_1'. Default: '1_0'.
+        num_classes: number of classification classes. Default: 1000.
+        drop_rate: dropout rate of the classifier. Default: 0.5.
+        in_channels: number the channels of the input. Default: 3.
+    """
+
+    def __init__(
+        self,
+        version: str = "1_0",
+        num_classes: int = 1000,
+        drop_rate: float = 0.5,
+        in_channels: int = 3,
+    ) -> None:
+        super().__init__()
+        if version == "1_0":
+            self.features = nn.SequentialCell([
+                nn.Conv2d(in_channels, 96, kernel_size=7, stride=2, pad_mode="valid", has_bias=True),
+                nn.ReLU(),
+                nn.MaxPool2d(kernel_size=3, stride=2),
+                Fire(96, 16, 64, 64),
+                Fire(128, 16, 64, 64),
+                Fire(128, 32, 128, 128),
+                nn.MaxPool2d(kernel_size=3, stride=2),
+                Fire(256, 32, 128, 128),
+                Fire(256, 48, 192, 192),
+                Fire(384, 48, 192, 192),
+                Fire(384, 64, 256, 256),
+                nn.MaxPool2d(kernel_size=3, stride=2),
+                Fire(512, 64, 256, 256),
+            ])
+        elif version == "1_1":
+            self.features = nn.SequentialCell([
+                nn.Conv2d(in_channels, 64, kernel_size=3, stride=2, padding=1, pad_mode="pad", has_bias=True),
+                nn.ReLU(),
+                nn.MaxPool2d(kernel_size=3, stride=2),
+                Fire(64, 16, 64, 64),
+                Fire(128, 16, 64, 64),
+                nn.MaxPool2d(kernel_size=3, stride=2),
+                Fire(128, 32, 128, 128),
+                Fire(256, 32, 128, 128),
+                nn.MaxPool2d(kernel_size=3, stride=2),
+                Fire(256, 48, 192, 192),
+                Fire(384, 48, 192, 192),
+                Fire(384, 64, 256, 256),
+                Fire(512, 64, 256, 256),
+            ])
+        else:
+            raise ValueError(f"Unsupported SqueezeNet version {version}: 1_0 or 1_1 expected")
+
+        self.final_conv = nn.Conv2d(512, num_classes, kernel_size=1, has_bias=True)
+        self.classifier = nn.SequentialCell([
+            Dropout(p=drop_rate),
+            self.final_conv,
+            nn.ReLU(),
+            GlobalAvgPooling()
+        ])
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                if cell is self.final_conv:
+                    cell.weight.set_data(init.initializer(init.Normal(), cell.weight.shape, cell.weight.dtype))
+                else:
+                    cell.weight.set_data(init.initializer(init.HeUniform(), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.squeezenet.squeezenet1_0(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get SqueezeNet model of version 1.0. +Refer to the base class models.SqueezeNet for more details.

+ +
+ Source code in mindcv/models/squeezenet.py +
153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
@register_model
+def squeezenet1_0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> SqueezeNet:
+    """Get SqueezeNet model of version 1.0.
+    Refer to the base class `models.SqueezeNet` for more details.
+    """
+    default_cfg = default_cfgs["squeezenet1_0"]
+    model = SqueezeNet(version="1_0", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.squeezenet.squeezenet1_1(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get SqueezeNet model of version 1.1. +Refer to the base class models.SqueezeNet for more details.

+ +
+ Source code in mindcv/models/squeezenet.py +
167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
@register_model
+def squeezenet1_1(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> SqueezeNet:
+    """Get SqueezeNet model of version 1.1.
+    Refer to the base class `models.SqueezeNet` for more details.
+    """
+    default_cfg = default_cfgs["squeezenet1_1"]
+    model = SqueezeNet(version="1_1", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

swintransformer

+ + +
+ + + +

+ mindcv.models.swintransformer.SwinTransformer + + +

+ + +
+

+ Bases: nn.Cell

+ + +

SwinTransformer model class, based on +"Swin Transformer: Hierarchical Vision Transformer using Shifted Windows" <https://arxiv.org/pdf/2103.14030>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
image_size +
+

Input image size. Default 224

+
+

+ + TYPE: + int | tuple(int + + + DEFAULT: + 224 + +

+
patch_size +
+

Patch size. Default: 4

+
+

+ + TYPE: + int | tuple(int + + + DEFAULT: + 4 + +

+
in_chans +
+

Number of input image channels. Default: 3

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

Number of classes for classification head. Default: 1000

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
embed_dim +
+

Patch embedding dimension. Default: 96

+
+

+ + TYPE: + int + + + DEFAULT: + 96 + +

+
depths +
+

Depth of each Swin Transformer layer.

+
+

+ + TYPE: + tuple(int + + + DEFAULT: + None + +

+
num_heads +
+

Number of attention heads in different layers.

+
+

+ + TYPE: + tuple(int + + + DEFAULT: + None + +

+
window_size +
+

Window size. Default: 7

+
+

+ + TYPE: + int + + + DEFAULT: + 7 + +

+
mlp_ratio +
+

Ratio of mlp hidden dim to embedding dim. Default: 4

+
+

+ + TYPE: + float + + + DEFAULT: + 4.0 + +

+
qkv_bias +
+

If True, add a learnable bias to query, key, value. Default: True

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
qk_scale +
+

Override default qk scale of head_dim ** -0.5 if set. Default: None

+
+

+ + TYPE: + float + + + DEFAULT: + None + +

+
drop_rate +
+

Dropout rate. Default: 0

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
attn_drop_rate +
+

Attention dropout rate. Default: 0

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
drop_path_rate +
+

Stochastic depth rate. Default: 0.1

+
+

+ + TYPE: + float + + + DEFAULT: + 0.1 + +

+
norm_layer +
+

Normalization layer. Default: nn.LayerNorm.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + nn.LayerNorm + +

+
ape +
+

If True, add absolute position embedding to the patch embedding. Default: False

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
patch_norm +
+

If True, add normalization after patch embedding. Default: True

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
+ +
+ Source code in mindcv/models/swintransformer.py +
567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
+613
+614
+615
+616
+617
+618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
+663
+664
+665
+666
+667
+668
+669
+670
+671
+672
+673
+674
+675
+676
+677
+678
+679
+680
+681
+682
+683
+684
+685
+686
+687
+688
+689
+690
+691
+692
+693
+694
+695
+696
class SwinTransformer(nn.Cell):
+    r"""SwinTransformer model class, based on
+    `"Swin Transformer: Hierarchical Vision Transformer using Shifted Windows" <https://arxiv.org/pdf/2103.14030>`_
+
+    Args:
+        image_size (int | tuple(int)): Input image size. Default 224
+        patch_size (int | tuple(int)): Patch size. Default: 4
+        in_chans (int): Number of input image channels. Default: 3
+        num_classes (int): Number of classes for classification head. Default: 1000
+        embed_dim (int): Patch embedding dimension. Default: 96
+        depths (tuple(int)): Depth of each Swin Transformer layer.
+        num_heads (tuple(int)): Number of attention heads in different layers.
+        window_size (int): Window size. Default: 7
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4
+        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None
+        drop_rate (float): Dropout rate. Default: 0
+        attn_drop_rate (float): Attention dropout rate. Default: 0
+        drop_path_rate (float): Stochastic depth rate. Default: 0.1
+        norm_layer (nn.Cell): Normalization layer. Default: nn.LayerNorm.
+        ape (bool): If True, add absolute position embedding to the patch embedding. Default: False
+        patch_norm (bool): If True, add normalization after patch embedding. Default: True
+    """
+
+    def __init__(
+        self,
+        image_size: int = 224,
+        patch_size: int = 4,
+        in_chans: int = 3,
+        num_classes: int = 1000,
+        embed_dim: int = 96,
+        depths: Optional[List[int]] = None,
+        num_heads: Optional[List[int]] = None,
+        window_size: int = 7,
+        mlp_ratio: float = 4.0,
+        qkv_bias: bool = True,
+        qk_scale: Optional[int] = None,
+        drop_rate: float = 0.0,
+        attn_drop_rate: float = 0.0,
+        drop_path_rate: float = 0.1,
+        norm_layer: Optional[nn.Cell] = nn.LayerNorm,
+        ape: bool = False,
+        patch_norm: bool = True,
+    ) -> None:
+        super().__init__()
+
+        self.num_classes = num_classes
+        self.num_layers = len(depths)
+        self.embed_dim = embed_dim
+        self.ape = ape
+        self.patch_norm = patch_norm
+        self.num_features = int(embed_dim * 2 ** (self.num_layers - 1))
+        self.mlp_ratio = mlp_ratio
+
+        # split image into non-overlapping patches
+        self.patch_embed = PatchEmbed(
+            image_size=image_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None)
+        num_patches = self.patch_embed.num_patches
+        patches_resolution = self.patch_embed.patches_resolution
+        self.patches_resolution = patches_resolution
+
+        # absolute position embedding
+        if self.ape:
+            self.absolute_pos_embed = Parameter(Tensor(np.zeros(1, num_patches, embed_dim), dtype=mstype.float32))
+
+        self.pos_drop = Dropout(p=drop_rate)
+
+        # stochastic depth
+        dpr = [x for x in np.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+
+        # build layers
+        self.layers = nn.CellList()
+        for i_layer in range(self.num_layers):
+            layer = BasicLayer(dim=int(embed_dim * 2 ** i_layer),
+                               input_resolution=(patches_resolution[0] // (2 ** i_layer),
+                                                 patches_resolution[1] // (2 ** i_layer)),
+                               depth=depths[i_layer],
+                               num_heads=num_heads[i_layer],
+                               window_size=window_size,
+                               mlp_ratio=self.mlp_ratio,
+                               qkv_bias=qkv_bias, qk_scale=qk_scale,
+                               drop=drop_rate, attn_drop=attn_drop_rate,
+                               drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
+                               norm_layer=norm_layer,
+                               downsample=PatchMerging if (i_layer < self.num_layers - 1) else None)
+            self.layers.append(layer)
+
+        self.norm = norm_layer([self.num_features, ], epsilon=1e-5)
+        self.classifier = nn.Dense(in_channels=self.num_features,
+                                   out_channels=num_classes, has_bias=True) if num_classes > 0 else Identity()
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Dense):
+                cell.weight.set_data(init.initializer(init.TruncatedNormal(sigma=0.02),
+                                                      cell.weight.shape, cell.weight.dtype))
+                if isinstance(cell, nn.Dense) and cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Zero(), cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.LayerNorm):
+                cell.gamma.set_data(init.initializer(init.One(), cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer(init.Zero(), cell.beta.shape, cell.beta.dtype))
+
+    def no_weight_decay(self) -> None:
+        return {"absolute_pos_embed"}
+
+    def no_weight_decay_keywords(self) -> None:
+        return {"relative_position_bias_table"}
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.classifier(x)
+        return x
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.patch_embed(x)
+        if self.ape:
+            x = x + self.absolute_pos_embed
+        x = self.pos_drop(x)
+        for layer in self.layers:
+            x = layer(x)
+        x = self.norm(x)  # B L C
+        x = ops.mean(ops.transpose(x, (0, 2, 1)), 2)  # B C 1
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.swintransformer.swin_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get SwinTransformer tiny model. +Refer to the base class 'models.SwinTransformer' for more details.

+ +
+ Source code in mindcv/models/swintransformer.py +
699
+700
+701
+702
+703
+704
+705
+706
+707
+708
+709
+710
+711
+712
+713
+714
@register_model
+def swin_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> SwinTransformer:
+    """Get SwinTransformer tiny model.
+    Refer to the base class 'models.SwinTransformer' for more details.
+    """
+    default_cfg = default_cfgs["swin_tiny"]
+    model = SwinTransformer(image_size=224, patch_size=4, in_chans=in_channels, num_classes=num_classes,
+                            embed_dim=96, depths=[2, 2, 6, 2], num_heads=[3, 6, 12, 24], window_size=7,
+                            mlp_ratio=4., qkv_bias=True, qk_scale=None,
+                            drop_rate=0., attn_drop_rate=0., drop_path_rate=0.2,
+                            norm_layer=nn.LayerNorm, ape=False, patch_norm=True, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

swintransformerv2

+ + +
+ + + +

+ mindcv.models.swintransformerv2.SwinTransformerV2 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

SwinTransformerV2 model class, based on +"Swin Transformer V2: Scaling Up Capacity and Resolution" <https://arxiv.org/abs/2111.09883>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
image_size +
+

Input image size. Default: 256.

+
+

+ + TYPE: + int + + + DEFAULT: + 256 + +

+
patch_size +
+

Patch size. Default: 4.

+
+

+ + TYPE: + int + + + DEFAULT: + 4 + +

+
in_channels +
+

Number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

Number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
embed_dim +
+

Patch embedding dimension. Default: 96.

+
+

+ + TYPE: + int + + + DEFAULT: + 96 + +

+
depths +
+

Depth of each Swin Transformer layer. Default: [2, 2, 6, 2].

+
+

+ + TYPE: + List[int] + + + DEFAULT: + [2, 2, 6, 2] + +

+
num_heads +
+

Number of attention heads in different layers. Default: [3, 6, 12, 24].

+
+

+ + TYPE: + List[int] + + + DEFAULT: + [3, 6, 12, 24] + +

+
window_size +
+

Window size. Default: 7.

+
+

+ + TYPE: + int + + + DEFAULT: + 7 + +

+
mlp_ratio +
+

Ratio of mlp hidden dim to embedding dim. Default: 4.

+
+

+ + TYPE: + float + + + DEFAULT: + 4.0 + +

+
qkv_bias +
+

If True, add a bias for query, key, value. Default: True.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
drop_rate +
+

Drop probability for the Dropout layer. Default: 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
attn_drop_rate +
+

Attention drop probability for the Dropout layer. Default: 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
drop_path_rate +
+

Stochastic depth rate. Default: 0.1.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.1 + +

+
norm_layer +
+

Normalization layer. Default: nn.LayerNorm.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + nn.LayerNorm + +

+
patch_norm +
+

If True, add normalization after patch embedding. Default: True.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
pretrained_window_sizes +
+

Pretrained window sizes of each layer. Default: [0, 0, 0, 0].

+
+

+ + TYPE: + List[int] + + + DEFAULT: + [0, 0, 0, 0] + +

+
+ +
+ Source code in mindcv/models/swintransformerv2.py +
521
+522
+523
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
+613
+614
+615
+616
+617
+618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
class SwinTransformerV2(nn.Cell):
+    r"""SwinTransformerV2 model class, based on
+    `"Swin Transformer V2: Scaling Up Capacity and Resolution" <https://arxiv.org/abs/2111.09883>`_
+
+    Args:
+        image_size: Input image size. Default: 256.
+        patch_size: Patch size. Default: 4.
+        in_channels: Number the channels of the input. Default: 3.
+        num_classes: Number of classification classes. Default: 1000.
+        embed_dim: Patch embedding dimension. Default: 96.
+        depths: Depth of each Swin Transformer layer. Default: [2, 2, 6, 2].
+        num_heads: Number of attention heads in different layers. Default: [3, 6, 12, 24].
+        window_size: Window size. Default: 7.
+        mlp_ratio: Ratio of mlp hidden dim to embedding dim. Default: 4.
+        qkv_bias: If True, add a bias for query, key, value. Default: True.
+        drop_rate: Drop probability for the Dropout layer. Default: 0.
+        attn_drop_rate: Attention drop probability for the Dropout layer. Default: 0.
+        drop_path_rate: Stochastic depth rate. Default: 0.1.
+        norm_layer: Normalization layer. Default: nn.LayerNorm.
+        patch_norm: If True, add normalization after patch embedding. Default: True.
+        pretrained_window_sizes: Pretrained window sizes of each layer. Default: [0, 0, 0, 0].
+    """
+
+    def __init__(
+        self,
+        image_size: int = 256,
+        patch_size: int = 4,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+        embed_dim: int = 96,
+        depths: List[int] = [2, 2, 6, 2],
+        num_heads: List[int] = [3, 6, 12, 24],
+        window_size: int = 7,
+        mlp_ratio: float = 4.0,
+        qkv_bias: bool = True,
+        drop_rate: float = 0.0,
+        attn_drop_rate: float = 0.0,
+        drop_path_rate: float = 0.1,
+        norm_layer: nn.Cell = nn.LayerNorm,
+        patch_norm: bool = True,
+        pretrained_window_sizes: List[int] = [0, 0, 0, 0],
+    ) -> None:
+        super().__init__()
+        self.num_classes = num_classes
+        self.num_layers = len(depths)
+        self.embed_dim = embed_dim
+        self.in_channels = in_channels
+        self.patch_size = patch_size
+        self.patch_norm = patch_norm
+        self.num_features = int(embed_dim * 2 ** (self.num_layers - 1))
+        self.mlp_ratio = mlp_ratio
+
+        # split image into non-overlapping patches
+        self.patch_embed = PatchEmbed(
+            image_size=image_size, patch_size=patch_size, in_chans=in_channels, embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None)
+        num_patches = self.patch_embed.num_patches
+        self.num_patches = num_patches
+        patches_resolution = self.patch_embed.patches_resolution
+        self.patches_resolution = patches_resolution
+
+        self.pos_drop = Dropout(p=drop_rate)
+
+        # stochastic depth
+        dpr = [x for x in np.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+
+        # build layers
+        self.layers = nn.CellList()
+        self.final_seq = num_patches  # downsample seq_length
+        for i_layer in range(self.num_layers):
+            layer = BasicLayer(
+                dim=int(embed_dim * 2**i_layer),
+                input_resolution=(patches_resolution[0] // (2**i_layer),
+                                  patches_resolution[1] // (2**i_layer)),
+                depth=depths[i_layer],
+                num_heads=num_heads[i_layer],
+                window_size=window_size,
+                mlp_ratio=self.mlp_ratio,
+                qkv_bias=qkv_bias,
+                drop=drop_rate, attn_drop=attn_drop_rate,
+                drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
+                norm_layer=norm_layer,
+                downsample=PatchMerging if (i_layer < self.num_layers - 1) else None,
+                pretrained_window_size=pretrained_window_sizes[i_layer]
+            )
+            # downsample seq_length
+            if i_layer < self.num_layers - 1:
+                self.final_seq = self.final_seq // 4
+            self.layers.append(layer)
+        self.head = nn.Dense(self.num_features, self.num_classes)
+
+        self.norm = norm_layer([self.num_features, ], epsilon=1e-6)
+        self.avgpool = ops.ReduceMean(keep_dims=False)
+
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(init.initializer(init.HeUniform(), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.LayerNorm):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.TruncatedNormal(sigma=0.02), cell.weight.shape, cell.weight.dtype)
+                )
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.patch_embed(x)
+        x = self.pos_drop(x)
+        for layer in self.layers:
+            x = layer(x)
+        x = self.norm(x)  # B L C
+        x = self.avgpool(ops.transpose(x, (0, 2, 1)), 2)  # B C 1
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.head(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.swintransformerv2.swinv2_base_window16(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/swintransformerv2.py +
717
+718
+719
+720
+721
+722
+723
+724
+725
+726
+727
@register_model
+def swinv2_base_window16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["swinv2_base_window16"]
+    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,
+                              window_size=16, embed_dim=128, depths=[2, 2, 18, 2],
+                              num_heads=[4, 8, 16, 32], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.swintransformerv2.swinv2_base_window8(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/swintransformerv2.py +
704
+705
+706
+707
+708
+709
+710
+711
+712
+713
+714
@register_model
+def swinv2_base_window8(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["swinv2_base_window8"]
+    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,
+                              window_size=8, embed_dim=128, depths=[2, 2, 18, 2],
+                              num_heads=[4, 8, 16, 32], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.swintransformerv2.swinv2_small_window16(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/swintransformerv2.py +
691
+692
+693
+694
+695
+696
+697
+698
+699
+700
+701
@register_model
+def swinv2_small_window16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["swinv2_small_window16"]
+    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,
+                              window_size=16, embed_dim=96, depths=[2, 2, 18, 2],
+                              num_heads=[3, 6, 12, 24], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.swintransformerv2.swinv2_small_window8(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/swintransformerv2.py +
678
+679
+680
+681
+682
+683
+684
+685
+686
+687
+688
@register_model
+def swinv2_small_window8(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["swinv2_small_window8"]
+    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,
+                              window_size=8, embed_dim=96, depths=[2, 2, 18, 2],
+                              num_heads=[3, 6, 12, 24], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.swintransformerv2.swinv2_tiny_window16(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/swintransformerv2.py +
665
+666
+667
+668
+669
+670
+671
+672
+673
+674
+675
@register_model
+def swinv2_tiny_window16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["swinv2_tiny_window16"]
+    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,
+                              window_size=16, embed_dim=96, depths=[2, 2, 6, 2],
+                              num_heads=[3, 6, 12, 24], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.swintransformerv2.swinv2_tiny_window8(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/swintransformerv2.py +
652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
@register_model
+def swinv2_tiny_window8(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["swinv2_tiny_window8"]
+    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,
+                              window_size=8, embed_dim=96, depths=[2, 2, 6, 2],
+                              num_heads=[3, 6, 12, 24], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

vgg

+ + +
+ + + +

+ mindcv.models.vgg.VGG + + +

+ + +
+

+ Bases: nn.Cell

+ + +

VGGNet model class, based on +"Very Deep Convolutional Networks for Large-Scale Image Recognition" <https://arxiv.org/abs/1409.1556>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
model_name +
+

name of the architecture. 'vgg11', 'vgg13', 'vgg16' or 'vgg19'.

+
+

+ + TYPE: + str + +

+
batch_norm +
+

use batch normalization or not. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
drop_rate +
+

dropout rate of the classifier. Default: 0.5.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.5 + +

+
+ +
+ Source code in mindcv/models/vgg.py +
 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
class VGG(nn.Cell):
+    r"""VGGNet model class, based on
+    `"Very Deep Convolutional Networks for Large-Scale Image Recognition" <https://arxiv.org/abs/1409.1556>`_
+
+    Args:
+        model_name: name of the architecture. 'vgg11', 'vgg13', 'vgg16' or 'vgg19'.
+        batch_norm: use batch normalization or not. Default: False.
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number the channels of the input. Default: 3.
+        drop_rate: dropout rate of the classifier. Default: 0.5.
+    """
+
+    def __init__(
+        self,
+        model_name: str,
+        batch_norm: bool = False,
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        drop_rate: float = 0.5,
+    ) -> None:
+        super().__init__()
+        cfg = cfgs[model_name]
+        self.features = _make_layers(cfg, batch_norm=batch_norm, in_channels=in_channels)
+        self.flatten = nn.Flatten()
+        self.classifier = nn.SequentialCell([
+            nn.Dense(512 * 7 * 7, 4096),
+            nn.ReLU(),
+            Dropout(p=drop_rate),
+            nn.Dense(4096, 4096),
+            nn.ReLU(),
+            Dropout(p=drop_rate),
+            nn.Dense(4096, num_classes),
+        ])
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(math.sqrt(5), mode="fan_out", nonlinearity="relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.Normal(0.01), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.flatten(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.vgg.vgg11(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 11 layers VGG model. +Refer to the base class models.VGG for more details.

+ +
+ Source code in mindcv/models/vgg.py +
138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
@register_model
+def vgg11(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VGG:
+    """Get 11 layers VGG model.
+    Refer to the base class `models.VGG` for more details.
+    """
+    default_cfg = default_cfgs["vgg11"]
+    model = VGG(model_name="vgg11", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vgg.vgg13(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 13 layers VGG model. +Refer to the base class models.VGG for more details.

+ +
+ Source code in mindcv/models/vgg.py +
152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
@register_model
+def vgg13(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VGG:
+    """Get 13 layers VGG model.
+    Refer to the base class `models.VGG` for more details.
+    """
+    default_cfg = default_cfgs["vgg13"]
+    model = VGG(model_name="vgg13", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vgg.vgg16(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 16 layers VGG model. +Refer to the base class models.VGG for more details.

+ +
+ Source code in mindcv/models/vgg.py +
166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
@register_model
+def vgg16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VGG:
+    """Get 16 layers VGG model.
+    Refer to the base class `models.VGG` for more details.
+    """
+    default_cfg = default_cfgs["vgg16"]
+    model = VGG(model_name="vgg16", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vgg.vgg19(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 19 layers VGG model. +Refer to the base class models.VGG for more details.

+ +
+ Source code in mindcv/models/vgg.py +
180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
@register_model
+def vgg19(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VGG:
+    """Get 19 layers VGG model.
+    Refer to the base class `models.VGG` for more details.
+    """
+    default_cfg = default_cfgs["vgg19"]
+    model = VGG(model_name="vgg19", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

visformer

+ + +
+ + + +

+ mindcv.models.visformer.Visformer + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Visformer model class, based on +'"Visformer: The Vision-friendly Transformer" +https://arxiv.org/pdf/2104.12533.pdf'

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
image_size +
+

images input size. Default: 224.

+
+

+ + TYPE: + int) + +

+
number +
+

32.

+
+

+ + TYPE: + the channels of the input. Default + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int) + + + DEFAULT: + 1000 + +

+
embed_dim +
+

embedding dimension in all head. Default: 384.

+
+

+ + TYPE: + int) + + + DEFAULT: + 384 + +

+
depth +
+

model block depth. Default: None.

+
+

+ + TYPE: + int) + + + DEFAULT: + None + +

+
num_heads +
+

number of heads. Default: None.

+
+

+ + TYPE: + int) + + + DEFAULT: + None + +

+
mlp_ratio +
+

ratio of hidden features in Mlp. Default: 4.

+
+

+ + TYPE: + float) + + + DEFAULT: + 4.0 + +

+
qkv_bias +
+

have bias in qkv layers or not. Default: False.

+
+

+ + TYPE: + bool) + + + DEFAULT: + False + +

+
qk_scale +
+

Override default qk scale of head_dim ** -0.5 if set.

+
+

+ + TYPE: + float) + + + DEFAULT: + None + +

+
drop_rate +
+

dropout rate. Default: 0.

+
+

+ + TYPE: + float) + + + DEFAULT: + 0.0 + +

+
attn_drop_rate +
+

attention layers dropout rate. Default: 0.

+
+

+ + TYPE: + float) + + + DEFAULT: + 0.0 + +

+
drop_path_rate +
+

drop path rate. Default: 0.1.

+
+

+ + TYPE: + float) + + + DEFAULT: + 0.1 + +

+
attn_stage +
+

block will have a attention layer if value = '1' else not. Default: '1111'.

+
+

+ + TYPE: + str) + + + DEFAULT: + '1111' + +

+
pos_embed +
+

position embedding. Default: True.

+
+

+ + TYPE: + bool) + + + DEFAULT: + True + +

+
spatial_conv +
+

block will have a spatial convolution layer if value = '1' else not. Default: '1111'.

+
+

+ + TYPE: + str) + + + DEFAULT: + '1111' + +

+
group +
+

convolution group. Default: 8.

+
+

+ + TYPE: + int) + + + DEFAULT: + 8 + +

+
pool +
+

if true will use global_pooling else not. Default: True.

+
+

+ + TYPE: + bool) + + + DEFAULT: + True + +

+
conv_init +
+

if true will init convolution weights else not. Default: False.

+
+

+ + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/visformer.py +
210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
class Visformer(nn.Cell):
+    r"""Visformer model class, based on
+    '"Visformer: The Vision-friendly Transformer"
+    <https://arxiv.org/pdf/2104.12533.pdf>'
+
+    Args:
+        image_size (int) : images input size. Default: 224.
+        number the channels of the input. Default: 32.
+        num_classes (int) : number of classification classes. Default: 1000.
+        embed_dim (int) : embedding dimension in all head. Default: 384.
+        depth (int) : model block depth. Default: None.
+        num_heads (int) : number of heads. Default: None.
+        mlp_ratio (float) : ratio of hidden features in Mlp. Default: 4.
+        qkv_bias (bool) : have bias in qkv layers or not. Default: False.
+        qk_scale (float) : Override default qk scale of head_dim ** -0.5 if set.
+        drop_rate (float) : dropout rate. Default: 0.
+        attn_drop_rate (float) : attention layers dropout rate. Default: 0.
+        drop_path_rate (float) : drop path rate. Default: 0.1.
+        attn_stage (str) : block will have a attention layer if value = '1' else not. Default: '1111'.
+        pos_embed (bool) : position embedding. Default: True.
+        spatial_conv (str) : block will have a spatial convolution layer if value = '1' else not. Default: '1111'.
+        group (int) : convolution group. Default: 8.
+        pool (bool) : if true will use global_pooling else not. Default: True.
+        conv_init : if true will init convolution weights else not. Default: False.
+    """
+
+    def __init__(
+        self,
+        img_size: int = 224,
+        init_channels: int = 32,
+        num_classes: int = 1000,
+        embed_dim: int = 384,
+        depth: List[int] = None,
+        num_heads: List[int] = None,
+        mlp_ratio: float = 4.0,
+        qkv_bias: bool = False,
+        qk_scale: float = None,
+        drop_rate: float = 0.0,
+        attn_drop_rate: float = 0.0,
+        drop_path_rate: float = 0.1,
+        attn_stage: str = "1111",
+        pos_embed: bool = True,
+        spatial_conv: str = "1111",
+        group: int = 8,
+        pool: bool = True,
+        conv_init: bool = False,
+    ) -> None:
+        super(Visformer, self).__init__()
+        self.num_classes = num_classes
+        self.num_features = self.embed_dim = embed_dim
+        self.init_channels = init_channels
+        self.img_size = img_size
+        self.pool = pool
+        self.conv_init = conv_init
+        self.depth = depth
+        assert (isinstance(depth, list) or isinstance(depth, tuple)) and len(depth) == 4
+        if not (isinstance(num_heads, list) or isinstance(num_heads, tuple)):
+            num_heads = [num_heads] * 4
+
+        self.pos_embed = pos_embed
+        dpr = np.linspace(0, drop_path_rate, sum(depth)).tolist()
+
+        self.stem = nn.SequentialCell([
+            nn.Conv2d(3, self.init_channels, 7, 2, pad_mode="pad", padding=3),
+            nn.BatchNorm2d(self.init_channels),
+            nn.ReLU()
+        ])
+        img_size //= 2
+
+        self.pos_drop = Dropout(p=drop_rate)
+        # stage0
+        if depth[0]:
+            self.patch_embed0 = PatchEmbed(img_size=img_size, patch_size=2, in_chans=self.init_channels,
+                                           embed_dim=embed_dim // 4)
+            img_size //= 2
+            if self.pos_embed:
+                self.pos_embed0 = mindspore.Parameter(
+                    ops.zeros((1, embed_dim // 4, img_size, img_size), mindspore.float32))
+            self.stage0 = nn.CellList([
+                Block(dim=embed_dim // 4, num_heads=num_heads[0], head_dim_ratio=0.25, mlp_ratio=mlp_ratio,
+                      qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
+                      group=group, attn_disabled=(attn_stage[0] == "0"), spatial_conv=(spatial_conv[0] == "1"))
+                for i in range(depth[0])
+            ])
+
+        # stage1
+        if depth[0]:
+            self.patch_embed1 = PatchEmbed(img_size=img_size, patch_size=2, in_chans=embed_dim // 4,
+                                           embed_dim=embed_dim // 2)
+            img_size //= 2
+        else:
+            self.patch_embed1 = PatchEmbed(img_size=img_size, patch_size=4, in_chans=self.init_channels,
+                                           embed_dim=embed_dim // 2)
+            img_size //= 4
+
+        if self.pos_embed:
+            self.pos_embed1 = mindspore.Parameter(ops.zeros((1, embed_dim // 2, img_size, img_size), mindspore.float32))
+
+        self.stage1 = nn.CellList([
+            Block(
+                dim=embed_dim // 2, num_heads=num_heads[1], head_dim_ratio=0.5, mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
+                group=group, attn_disabled=(attn_stage[1] == "0"), spatial_conv=(spatial_conv[1] == "1")
+            )
+            for i in range(sum(depth[:1]), sum(depth[:2]))
+        ])
+
+        # stage2
+        self.patch_embed2 = PatchEmbed(img_size=img_size, patch_size=2, in_chans=embed_dim // 2, embed_dim=embed_dim)
+        img_size //= 2
+        if self.pos_embed:
+            self.pos_embed2 = mindspore.Parameter(ops.zeros((1, embed_dim, img_size, img_size), mindspore.float32))
+        self.stage2 = nn.CellList([
+            Block(
+                dim=embed_dim, num_heads=num_heads[2], head_dim_ratio=1.0, mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
+                group=group, attn_disabled=(attn_stage[2] == "0"), spatial_conv=(spatial_conv[2] == "1")
+            )
+            for i in range(sum(depth[:2]), sum(depth[:3]))
+        ])
+
+        # stage3
+        self.patch_embed3 = PatchEmbed(img_size=img_size, patch_size=2, in_chans=embed_dim, embed_dim=embed_dim * 2)
+        img_size //= 2
+        if self.pos_embed:
+            self.pos_embed3 = mindspore.Parameter(ops.zeros((1, embed_dim * 2, img_size, img_size), mindspore.float32))
+        self.stage3 = nn.CellList([
+            Block(
+                dim=embed_dim * 2, num_heads=num_heads[3], head_dim_ratio=1.0, mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
+                group=group, attn_disabled=(attn_stage[3] == "0"), spatial_conv=(spatial_conv[3] == "1")
+            )
+            for i in range(sum(depth[:3]), sum(depth[:4]))
+        ])
+
+        # head
+        if self.pool:
+            self.global_pooling = GlobalAvgPooling()
+
+        self.norm = nn.BatchNorm2d(embed_dim * 2)
+        self.head = nn.Dense(embed_dim * 2, num_classes)
+
+        # weight init
+        if self.pos_embed:
+            if depth[0]:
+                self.pos_embed0.set_data(initializer(TruncatedNormal(0.02),
+                                                     self.pos_embed0.shape, self.pos_embed0.dtype))
+            self.pos_embed1.set_data(initializer(TruncatedNormal(0.02),
+                                                 self.pos_embed1.shape, self.pos_embed1.dtype))
+            self.pos_embed2.set_data(initializer(TruncatedNormal(0.02),
+                                                 self.pos_embed2.shape, self.pos_embed2.dtype))
+            self.pos_embed3.set_data(initializer(TruncatedNormal(0.02),
+                                                 self.pos_embed3.shape, self.pos_embed3.dtype))
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Dense):
+                cell.weight.set_data(initializer(TruncatedNormal(0.02), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(initializer(Constant(0), cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.LayerNorm):
+                cell.beta.set_data(initializer(Constant(0), cell.beta.shape, cell.beta.dtype))
+                cell.gamma.set_data(initializer(Constant(1), cell.gamma.shape, cell.gamma.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.beta.set_data(initializer(Constant(0), cell.beta.shape, cell.beta.dtype))
+                cell.gamma.set_data(initializer(Constant(1), cell.gamma.shape, cell.gamma.dtype))
+            elif isinstance(cell, nn.Conv2d):
+                if self.conv_init:
+                    cell.weight.set_data(initializer(HeNormal(mode="fan_out", nonlinearity="relu"), cell.weight.shape,
+                                                     cell.weight.dtype))
+                else:
+                    cell.weight.set_data(initializer(TruncatedNormal(0.02), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(initializer(Constant(0), cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.stem(x)
+
+        # stage 0
+        if self.depth[0]:
+            x = self.patch_embed0(x)
+            if self.pos_embed:
+                x = x + self.pos_embed0
+                x = self.pos_drop(x)
+            for b in self.stage0:
+                x = b(x)
+
+        # stage 1
+        x = self.patch_embed1(x)
+        if self.pos_embed:
+            x = x + self.pos_embed1
+            x = self.pos_drop(x)
+        for b in self.stage1:
+            x = b(x)
+
+        # stage 2
+        x = self.patch_embed2(x)
+        if self.pos_embed:
+            x = x + self.pos_embed2
+            x = self.pos_drop(x)
+        for b in self.stage2:
+            x = b(x)
+
+        # stage 3
+        x = self.patch_embed3(x)
+        if self.pos_embed:
+            x = x + self.pos_embed3
+            x = self.pos_drop(x)
+        for b in self.stage3:
+            x = b(x)
+        x = self.norm(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        # head
+        if self.pool:
+            x = self.global_pooling(x)
+        else:
+            x = x[:, :, 0, 0]
+        x = self.head(x.view(x.shape[0], -1))
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.visformer.visformer_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get visformer small model. +Refer to the base class 'models.visformer' for more details.

+ +
+ Source code in mindcv/models/visformer.py +
468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
@register_model
+def visformer_small(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """Get visformer small model.
+    Refer to the base class 'models.visformer' for more details.
+    """
+    default_cfg = default_cfgs["visformer_small"]
+    model = Visformer(img_size=224, init_channels=32, num_classes=num_classes, embed_dim=384,
+                      depth=[0, 7, 4, 4], num_heads=[6, 6, 6, 6], mlp_ratio=4., group=8,
+                      attn_stage="0011", spatial_conv="1100", conv_init=True, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.visformer.visformer_small_v2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get visformer small2 model. +Refer to the base class 'models.visformer' for more details.

+ +
+ Source code in mindcv/models/visformer.py +
482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
@register_model
+def visformer_small_v2(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """Get visformer small2 model.
+    Refer to the base class 'models.visformer' for more details.
+    """
+    default_cfg = default_cfgs["visformer_small_v2"]
+    model = Visformer(img_size=224, init_channels=32, num_classes=num_classes, embed_dim=256,
+                      depth=[1, 10, 14, 3], num_heads=[2, 4, 8, 16], mlp_ratio=4., qk_scale=-0.5,
+                      group=8, attn_stage="0011", spatial_conv="1100", conv_init=True, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.visformer.visformer_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get visformer tiny model. +Refer to the base class 'models.visformer' for more details.

+ +
+ Source code in mindcv/models/visformer.py +
439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
@register_model
+def visformer_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """Get visformer tiny model.
+    Refer to the base class 'models.visformer' for more details.
+    """
+    default_cfg = default_cfgs["visformer_tiny"]
+    model = Visformer(img_size=224, init_channels=16, num_classes=num_classes, embed_dim=192,
+                      depth=[0, 7, 4, 4], num_heads=[3, 3, 3, 3], mlp_ratio=4., group=8,
+                      attn_stage="0011", spatial_conv="1100", drop_path_rate=0.03, conv_init=True, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.visformer.visformer_tiny_v2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get visformer tiny2 model. +Refer to the base class 'models.visformer' for more details.

+ +
+ Source code in mindcv/models/visformer.py +
454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
@register_model
+def visformer_tiny_v2(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """Get visformer tiny2 model.
+    Refer to the base class 'models.visformer' for more details.
+    """
+    default_cfg = default_cfgs["visformer_tiny_v2"]
+    model = Visformer(img_size=224, init_channels=24, num_classes=num_classes, embed_dim=192,
+                      depth=[1, 4, 6, 3], num_heads=[1, 3, 6, 12], mlp_ratio=4., qk_scale=-0.5, group=8,
+                      attn_stage="0011", spatial_conv="1100", drop_path_rate=0.03, conv_init=True, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

vit

+ + +
+ + + +

+ mindcv.models.vit.ViT + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Vision Transformer architecture implementation.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
image_size +
+

Input image size. Default: 224.

+
+

+ + TYPE: + int + + + DEFAULT: + 224 + +

+
input_channels +
+

The number of input channel. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
patch_size +
+

Patch size of image. Default: 16.

+
+

+ + TYPE: + int + + + DEFAULT: + 16 + +

+
embed_dim +
+

The dimension of embedding. Default: 768.

+
+

+ + TYPE: + int + + + DEFAULT: + 768 + +

+
num_layers +
+

The depth of transformer. Default: 12.

+
+

+ + TYPE: + int + + + DEFAULT: + 12 + +

+
num_heads +
+

The number of attention heads. Default: 12.

+
+

+ + TYPE: + int + + + DEFAULT: + 12 + +

+
mlp_dim +
+

The dimension of MLP hidden layer. Default: 3072.

+
+

+ + TYPE: + int + + + DEFAULT: + 3072 + +

+
keep_prob +
+

The keep rate, greater than 0 and less equal than 1. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
attention_keep_prob +
+

The keep rate for attention layer. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
drop_path_keep_prob +
+

The keep rate for drop path. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
activation +
+

Activation function which will be stacked on top of the +normalization layer (if not None), otherwise on top of the conv layer. Default: nn.GELU.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + nn.GELU + +

+
norm +
+

Norm layer that will be stacked on top of the convolution +layer. Default: nn.LayerNorm.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + nn.LayerNorm + +

+
pool +
+

The method of pooling. Default: 'cls'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'cls' + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, 768)

+
+ + + + + + + + + + + + + + + +
RAISESDESCRIPTION
+ + ValueError + + +
+

If split is not 'train', 'test' or 'infer'.

+
+
+ +
+ Supported Platforms +

GPU

+
+ + +

Examples:

+
>>> net = ViT()
+>>> x = ms.Tensor(np.ones([1, 3, 224, 224]), ms.float32)
+>>> output = net(x)
+>>> print(output.shape)
+(1, 768)
+
+

About ViT:

+

Vision Transformer (ViT) shows that a pure transformer applied directly to sequences of image +patches can perform very well on image classification tasks. When pre-trained on large amounts +of data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet, +CIFAR-100, VTAB, etc.), Vision Transformer (ViT) attains excellent results compared to state-of-the-art +convolutional networks while requiring substantially fewer computational resources to train.

+

Citation:

+

.. code-block::

+
@article{2020An,
+title={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale},
+author={Dosovitskiy, A. and Beyer, L. and Kolesnikov, A. and Weissenborn, D. and Houlsby, N.},
+year={2020},
+}
+
+ +
+ Source code in mindcv/models/vit.py +
468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
+513
+514
+515
+516
+517
+518
+519
+520
+521
+522
+523
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
class ViT(nn.Cell):
+    """
+    Vision Transformer architecture implementation.
+
+    Args:
+        image_size (int): Input image size. Default: 224.
+        input_channels (int): The number of input channel. Default: 3.
+        patch_size (int): Patch size of image. Default: 16.
+        embed_dim (int): The dimension of embedding. Default: 768.
+        num_layers (int): The depth of transformer. Default: 12.
+        num_heads (int): The number of attention heads. Default: 12.
+        mlp_dim (int): The dimension of MLP hidden layer. Default: 3072.
+        keep_prob (float): The keep rate, greater than 0 and less equal than 1. Default: 1.0.
+        attention_keep_prob (float): The keep rate for attention layer. Default: 1.0.
+        drop_path_keep_prob (float): The keep rate for drop path. Default: 1.0.
+        activation (nn.Cell): Activation function which will be stacked on top of the
+            normalization layer (if not None), otherwise on top of the conv layer. Default: nn.GELU.
+        norm (nn.Cell, optional): Norm layer that will be stacked on top of the convolution
+            layer. Default: nn.LayerNorm.
+        pool (str): The method of pooling. Default: 'cls'.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, 768)`
+
+    Raises:
+        ValueError: If `split` is not 'train', 'test' or 'infer'.
+
+    Supported Platforms:
+        ``GPU``
+
+    Examples:
+        >>> net = ViT()
+        >>> x = ms.Tensor(np.ones([1, 3, 224, 224]), ms.float32)
+        >>> output = net(x)
+        >>> print(output.shape)
+        (1, 768)
+
+    About ViT:
+
+    Vision Transformer (ViT) shows that a pure transformer applied directly to sequences of image
+    patches can perform very well on image classification tasks. When pre-trained on large amounts
+    of data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet,
+    CIFAR-100, VTAB, etc.), Vision Transformer (ViT) attains excellent results compared to state-of-the-art
+    convolutional networks while requiring substantially fewer computational resources to train.
+
+    Citation:
+
+    .. code-block::
+
+        @article{2020An,
+        title={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale},
+        author={Dosovitskiy, A. and Beyer, L. and Kolesnikov, A. and Weissenborn, D. and Houlsby, N.},
+        year={2020},
+        }
+    """
+
+    def __init__(
+        self,
+        image_size: int = 224,
+        input_channels: int = 3,
+        patch_size: int = 16,
+        embed_dim: int = 768,
+        num_layers: int = 12,
+        num_heads: int = 12,
+        mlp_dim: int = 3072,
+        keep_prob: float = 1.0,
+        attention_keep_prob: float = 1.0,
+        drop_path_keep_prob: float = 1.0,
+        activation: nn.Cell = nn.GELU,
+        norm: Optional[nn.Cell] = nn.LayerNorm,
+        pool: str = "cls",
+    ) -> None:
+        super().__init__()
+
+        self.patch_embedding = PatchEmbedding(image_size=image_size,
+                                              patch_size=patch_size,
+                                              embed_dim=embed_dim,
+                                              input_channels=input_channels)
+        num_patches = self.patch_embedding.num_patches
+
+        if pool == "cls":
+            self.cls_token = init(init_type=Normal(sigma=1.0),
+                                  shape=(1, 1, embed_dim),
+                                  dtype=ms.float32,
+                                  name="cls",
+                                  requires_grad=True)
+            self.pos_embedding = init(init_type=Normal(sigma=1.0),
+                                      shape=(1, num_patches + 1, embed_dim),
+                                      dtype=ms.float32,
+                                      name="pos_embedding",
+                                      requires_grad=True)
+            self.concat = ops.Concat(axis=1)
+        else:
+            self.pos_embedding = init(init_type=Normal(sigma=1.0),
+                                      shape=(1, num_patches, embed_dim),
+                                      dtype=ms.float32,
+                                      name="pos_embedding",
+                                      requires_grad=True)
+            self.mean = ops.ReduceMean(keep_dims=False)
+
+        self.pool = pool
+        self.pos_dropout = Dropout(p=1.0-keep_prob)
+        self.norm = norm((embed_dim,))
+        self.tile = ops.Tile()
+        self.transformer = TransformerEncoder(
+            dim=embed_dim,
+            num_layers=num_layers,
+            num_heads=num_heads,
+            mlp_dim=mlp_dim,
+            keep_prob=keep_prob,
+            attention_keep_prob=attention_keep_prob,
+            drop_path_keep_prob=drop_path_keep_prob,
+            activation=activation,
+            norm=norm,
+        )
+
+    def construct(self, x):
+        """ViT construct."""
+        x = self.patch_embedding(x)
+
+        if self.pool == "cls":
+            cls_tokens = self.tile(self.cls_token, (x.shape[0], 1, 1))
+            x = self.concat((cls_tokens, x))
+            x += self.pos_embedding
+        else:
+            x += self.pos_embedding
+        x = self.pos_dropout(x)
+        x = self.transformer(x)
+        x = self.norm(x)
+
+        if self.pool == "cls":
+            x = x[:, 0]
+        else:
+            x = self.mean(x, (1, ))  # (1,) or (1,2)
+        return x
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.vit.ViT.construct(x) + +

+ + +
+ +

ViT construct.

+ +
+ Source code in mindcv/models/vit.py +
587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
def construct(self, x):
+    """ViT construct."""
+    x = self.patch_embedding(x)
+
+    if self.pool == "cls":
+        cls_tokens = self.tile(self.cls_token, (x.shape[0], 1, 1))
+        x = self.concat((cls_tokens, x))
+        x += self.pos_embedding
+    else:
+        x += self.pos_embedding
+    x = self.pos_dropout(x)
+    x = self.transformer(x)
+    x = self.norm(x)
+
+    if self.pool == "cls":
+        x = x[:, 0]
+    else:
+        x = self.mean(x, (1, ))  # (1,) or (1,2)
+    return x
+
+
+
+ +
+ + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.vit.vit_b_16_224(pretrained=False, num_classes=1000, in_channels=3, image_size=224, has_logits=False, drop_rate=0.0, drop_path_rate=0.0) + +

+ + +
+ +

Constructs a vit_b_16 architecture from +An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>_.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

Whether to download and load the pre-trained model. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The number of classification. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The number of input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
image_size +
+

The input image size. Default: 224 for ImageNet.

+
+

+ + TYPE: + int + + + DEFAULT: + 224 + +

+
has_logits +
+

Whether has logits or not. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
drop_rate +
+

The drop out rate. Default: 0.0.s

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
drop_path_rate +
+

The stochastic depth rate. Default: 0.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + ViT + + +
+

ViT network, MindSpore.nn.Cell

+
+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+ + +

Examples:

+
>>> net = vit_b_16_224()
+>>> x = ms.Tensor(np.ones([1, 3, 224, 224]), ms.float32)
+>>> output = net(x)
+>>> print(output.shape)
+(1, 1000)
+
+ +
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out})

+
+
+ Supported Platforms +

GPU

+
+
+ Source code in mindcv/models/vit.py +
663
+664
+665
+666
+667
+668
+669
+670
+671
+672
+673
+674
+675
+676
+677
+678
+679
+680
+681
+682
+683
+684
+685
+686
+687
+688
+689
+690
+691
+692
+693
+694
+695
+696
+697
+698
+699
+700
+701
+702
+703
+704
+705
+706
+707
+708
+709
+710
+711
+712
+713
+714
+715
+716
+717
+718
+719
+720
+721
+722
+723
+724
@register_model
+def vit_b_16_224(
+    pretrained: bool = False,
+    num_classes: int = 1000,
+    in_channels: int = 3,
+    image_size: int = 224,
+    has_logits: bool = False,
+    drop_rate: float = 0.0,
+    # attention-dropout: float = 0.0,
+    drop_path_rate: float = 0.0,
+) -> ViT:
+    """
+    Constructs a vit_b_16 architecture from
+    `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>`_.
+
+    Args:
+        pretrained (bool): Whether to download and load the pre-trained model. Default: False.
+        num_classes (int): The number of classification. Default: 1000.
+        in_channels (int): The number of input channels. Default: 3.
+        image_size (int): The input image size. Default: 224 for ImageNet.
+        has_logits (bool): Whether has logits or not. Default: False.
+        drop_rate (float): The drop out rate. Default: 0.0.s
+        drop_path_rate (float): The stochastic depth rate. Default: 0.0.
+
+    Returns:
+        ViT network, MindSpore.nn.Cell
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Examples:
+        >>> net = vit_b_16_224()
+        >>> x = ms.Tensor(np.ones([1, 3, 224, 224]), ms.float32)
+        >>> output = net(x)
+        >>> print(output.shape)
+        (1, 1000)
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`
+
+    Supported Platforms:
+        ``GPU``
+    """
+    config = ConfigDict()
+    config.image_size = image_size
+    config.num_classes = num_classes
+    config.patch_size = 16
+    config.embed_dim = 768
+    config.mlp_dim = 3072
+    config.num_heads = 12
+    config.num_layers = 12
+    config.dropout = drop_rate
+    config.attention_dropout = drop_rate  # attention-dropout
+    config.drop_path_rate = drop_path_rate
+    config.pretrained = pretrained
+    config.input_channels = in_channels
+    config.pool = "cls"
+    config.representation_size = 768 if has_logits else None
+
+    config.url_cfg = default_cfgs["vit_b_16_224"]
+
+    return vit(**config)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vit.vit_b_16_384(pretrained=False, num_classes=1000, in_channels=3, image_size=384, has_logits=False, drop_rate=0.0, drop_path_rate=0.0) + +

+ + +
+ +

construct and return a ViT network

+ +
+ Source code in mindcv/models/vit.py +
727
+728
+729
+730
+731
+732
+733
+734
+735
+736
+737
+738
+739
+740
+741
+742
+743
+744
+745
+746
+747
+748
+749
+750
+751
+752
+753
+754
+755
+756
+757
@register_model
+def vit_b_16_384(
+    pretrained: bool = False,
+    num_classes: int = 1000,
+    in_channels: int = 3,
+    image_size: int = 384,
+    has_logits: bool = False,
+    drop_rate: float = 0.0,
+    # attention-dropout: float = 0.0,
+    drop_path_rate: float = 0.0,
+) -> ViT:
+    """construct and return a ViT network"""
+    config = ConfigDict()
+    config.image_size = image_size
+    config.num_classes = num_classes
+    config.patch_size = 16
+    config.embed_dim = 768
+    config.mlp_dim = 3072
+    config.num_heads = 12
+    config.num_layers = 12
+    config.dropout = drop_rate
+    config.attention_dropout = drop_rate  # attention-dropout
+    config.drop_path_rate = drop_path_rate
+    config.pretrained = pretrained
+    config.input_channels = in_channels
+    config.pool = "cls"
+    config.representation_size = 768 if has_logits else None
+
+    config.url_cfg = default_cfgs["vit_b_16_384"]
+
+    return vit(**config)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vit.vit_b_32_224(pretrained=False, num_classes=1000, in_channels=3, image_size=224, has_logits=False, drop_rate=0.0, drop_path_rate=0.0) + +

+ + +
+ +

construct and return a ViT network

+ +
+ Source code in mindcv/models/vit.py +
828
+829
+830
+831
+832
+833
+834
+835
+836
+837
+838
+839
+840
+841
+842
+843
+844
+845
+846
+847
+848
+849
+850
+851
+852
+853
+854
+855
+856
+857
+858
@register_model
+def vit_b_32_224(
+    pretrained: bool = False,
+    num_classes: int = 1000,
+    in_channels: int = 3,
+    image_size: int = 224,
+    has_logits: bool = False,
+    drop_rate: float = 0.0,
+    # attention-dropout: float = 0.0,
+    drop_path_rate: float = 0.0,
+) -> ViT:
+    """construct and return a ViT network"""
+    config = ConfigDict()
+    config.image_size = image_size
+    config.num_classes = num_classes
+    config.patch_size = 32
+    config.embed_dim = 768
+    config.mlp_dim = 3072
+    config.num_heads = 12
+    config.num_layers = 12
+    config.dropout = drop_rate
+    config.attention_dropout = drop_rate  # attention-dropout
+    config.drop_path_rate = drop_path_rate
+    config.pretrained = pretrained
+    config.input_channels = in_channels
+    config.pool = "cls"
+    config.representation_size = 768 if has_logits else None
+
+    config.url_cfg = default_cfgs["vit_b_32_224"]
+
+    return vit(**config)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vit.vit_b_32_384(pretrained=False, num_classes=1000, in_channels=3, image_size=384, has_logits=False, drop_rate=0.0, drop_path_rate=0.0) + +

+ + +
+ +

construct and return a ViT network

+ +
+ Source code in mindcv/models/vit.py +
861
+862
+863
+864
+865
+866
+867
+868
+869
+870
+871
+872
+873
+874
+875
+876
+877
+878
+879
+880
+881
+882
+883
+884
+885
+886
+887
+888
+889
+890
+891
@register_model
+def vit_b_32_384(
+    pretrained: bool = False,
+    num_classes: int = 1000,
+    in_channels: int = 3,
+    image_size: int = 384,
+    has_logits: bool = False,
+    drop_rate: float = 0.0,
+    # attention_dropout: float = 0.0,
+    drop_path_rate: float = 0.0,
+) -> ViT:
+    """construct and return a ViT network"""
+    config = ConfigDict()
+    config.image_size = image_size
+    config.num_classes = num_classes
+    config.patch_size = 32
+    config.embed_dim = 768
+    config.mlp_dim = 3072
+    config.num_heads = 12
+    config.num_layers = 12
+    config.dropout = drop_rate
+    config.attention_dropout = drop_rate  # attention_dropout
+    config.drop_path_rate = drop_path_rate
+    config.pretrained = pretrained
+    config.input_channels = in_channels
+    config.pool = "cls"
+    config.representation_size = 768 if has_logits else None
+
+    config.url_cfg = default_cfgs["vit_b_32_384"]
+
+    return vit(**config)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vit.vit_l_16_224(pretrained=False, num_classes=1000, in_channels=3, image_size=224, has_logits=False, drop_rate=0.0, drop_path_rate=0.0) + +

+ + +
+ +

construct and return a ViT network

+ +
+ Source code in mindcv/models/vit.py +
760
+761
+762
+763
+764
+765
+766
+767
+768
+769
+770
+771
+772
+773
+774
+775
+776
+777
+778
+779
+780
+781
+782
+783
+784
+785
+786
+787
+788
+789
+790
+791
@register_model
+def vit_l_16_224(
+    pretrained: bool = False,
+    num_classes: int = 1000,
+    in_channels: int = 3,
+    image_size: int = 224,
+    has_logits: bool = False,
+    drop_rate: float = 0.0,
+    # attention-dropout: float = 0.0,
+    drop_path_rate: float = 0.0,
+) -> ViT:
+    """construct and return a ViT network"""
+
+    config = ConfigDict()
+    config.image_size = image_size
+    config.num_classes = num_classes
+    config.patch_size = 16
+    config.embed_dim = 1024
+    config.mlp_dim = 4096
+    config.num_heads = 16
+    config.num_layers = 24
+    config.dropout = drop_rate
+    config.attention_dropout = drop_rate  # attention-dropout
+    config.drop_path_rate = drop_path_rate
+    config.input_channels = in_channels
+    config.pool = "cls"
+    config.pretrained = pretrained
+    config.representation_size = 1024 if has_logits else None
+
+    config.url_cfg = default_cfgs["vit_l_16_224"]
+
+    return vit(**config)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vit.vit_l_16_384(pretrained=False, num_classes=1000, in_channels=3, image_size=384, has_logits=False, drop_rate=0.0, drop_path_rate=0.0) + +

+ + +
+ +

construct and return a ViT network

+ +
+ Source code in mindcv/models/vit.py +
794
+795
+796
+797
+798
+799
+800
+801
+802
+803
+804
+805
+806
+807
+808
+809
+810
+811
+812
+813
+814
+815
+816
+817
+818
+819
+820
+821
+822
+823
+824
+825
@register_model
+def vit_l_16_384(
+    pretrained: bool = False,
+    num_classes: int = 1000,
+    in_channels: int = 3,
+    image_size: int = 384,
+    has_logits: bool = False,
+    drop_rate: float = 0.0,
+    # attention-dropout: float = 0.0,
+    drop_path_rate: float = 0.0,
+) -> ViT:
+    """construct and return a ViT network"""
+
+    config = ConfigDict()
+    config.image_size = image_size
+    config.num_classes = num_classes
+    config.patch_size = 16
+    config.embed_dim = 1024
+    config.mlp_dim = 4096
+    config.num_heads = 16
+    config.num_layers = 24
+    config.dropout = drop_rate
+    config.attention_dropout = drop_rate  # attention-dropout
+    config.drop_path_rate = drop_path_rate
+    config.input_channels = in_channels
+    config.pool = "cls"
+    config.pretrained = pretrained
+    config.representation_size = 1024 if has_logits else None
+
+    config.url_cfg = default_cfgs["vit_l_16_384"]
+
+    return vit(**config)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vit.vit_l_32_224(pretrained=False, num_classes=1000, in_channels=3, image_size=224, has_logits=False, drop_rate=0.0, drop_path_rate=0.0) + +

+ + +
+ +

construct and return a ViT network

+ +
+ Source code in mindcv/models/vit.py +
894
+895
+896
+897
+898
+899
+900
+901
+902
+903
+904
+905
+906
+907
+908
+909
+910
+911
+912
+913
+914
+915
+916
+917
+918
+919
+920
+921
+922
+923
+924
@register_model
+def vit_l_32_224(
+    pretrained: bool = False,
+    num_classes: int = 1000,
+    in_channels: int = 3,
+    image_size: int = 224,
+    has_logits: bool = False,
+    drop_rate: float = 0.0,
+    # attention-dropout: float = 0.0,
+    drop_path_rate: float = 0.0,
+) -> ViT:
+    """construct and return a ViT network"""
+    config = ConfigDict()
+    config.image_size = image_size
+    config.num_classes = num_classes
+    config.patch_size = 32
+    config.embed_dim = 1024
+    config.mlp_dim = 4096
+    config.num_heads = 16
+    config.num_layers = 24
+    config.dropout = drop_rate
+    config.attention_dropout = drop_rate  # attention-dropout
+    config.drop_path_rate = drop_path_rate
+    config.pretrained = pretrained
+    config.input_channels = in_channels
+    config.pool = "cls"
+    config.representation_size = 1024 if has_logits else None
+
+    config.url_cfg = default_cfgs["vit_l_32_224"]
+
+    return vit(**config)
+
+
+
+ +

volo

+ + +
+ + + +

+ mindcv.models.volo.VOLO + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Vision Outlooker, the main class of our model +--layers: [x,x,x,x], four blocks in two stages, the first block is outlooker, the + other three are transformer, we set four blocks, which are easily + applied to downstream tasks +--img_size, --in_channels, --num_classes: these three are very easy to understand +--patch_size: patch_size in outlook attention +--stem_hidden_dim: hidden dim of patch embedding, d1-d4 is 64, d5 is 128 +--embed_dims, --num_heads: embedding dim, number of heads in each block +--downsamples: flags to apply downsampling or not +--outlook_attention: flags to apply outlook attention or not +--mlp_ratios, --qkv_bias, --qk_scale, --drop_rate: easy to undertand +--attn_drop_rate, --drop_path_rate, --norm_layer: easy to undertand +--post_layers: post layers like two class attention layers using [ca, ca], + if yes, return_mean=False +--return_mean: use mean of all feature tokens for classification, if yes, no class token +--return_dense: use token labeling, details are here: + https://github.com/zihangJiang/TokenLabeling +--mix_token: mixing tokens as token labeling, details are here: + https://github.com/zihangJiang/TokenLabeling +--pooling_scale: pooling_scale=2 means we downsample 2x +--out_kernel, --out_stride, --out_padding: kerner size, + stride, and padding for outlook attention

+ +
+ Source code in mindcv/models/volo.py +
550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
+613
+614
+615
+616
+617
+618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
+663
+664
+665
+666
+667
+668
+669
+670
+671
+672
+673
+674
+675
+676
+677
+678
+679
+680
+681
+682
+683
+684
+685
+686
+687
+688
+689
+690
+691
+692
+693
+694
+695
+696
+697
+698
+699
+700
+701
+702
+703
+704
+705
+706
+707
+708
+709
+710
+711
+712
+713
+714
+715
+716
+717
+718
+719
+720
+721
+722
+723
+724
+725
+726
+727
+728
+729
+730
+731
+732
+733
+734
+735
+736
+737
+738
+739
+740
+741
+742
class VOLO(nn.Cell):
+    """
+    Vision Outlooker, the main class of our model
+    --layers: [x,x,x,x], four blocks in two stages, the first block is outlooker, the
+              other three are transformer, we set four blocks, which are easily
+              applied to downstream tasks
+    --img_size, --in_channels, --num_classes: these three are very easy to understand
+    --patch_size: patch_size in outlook attention
+    --stem_hidden_dim: hidden dim of patch embedding, d1-d4 is 64, d5 is 128
+    --embed_dims, --num_heads: embedding dim, number of heads in each block
+    --downsamples: flags to apply downsampling or not
+    --outlook_attention: flags to apply outlook attention or not
+    --mlp_ratios, --qkv_bias, --qk_scale, --drop_rate: easy to undertand
+    --attn_drop_rate, --drop_path_rate, --norm_layer: easy to undertand
+    --post_layers: post layers like two class attention layers using [ca, ca],
+                  if yes, return_mean=False
+    --return_mean: use mean of all feature tokens for classification, if yes, no class token
+    --return_dense: use token labeling, details are here:
+                    https://github.com/zihangJiang/TokenLabeling
+    --mix_token: mixing tokens as token labeling, details are here:
+                    https://github.com/zihangJiang/TokenLabeling
+    --pooling_scale: pooling_scale=2 means we downsample 2x
+    --out_kernel, --out_stride, --out_padding: kerner size,
+                                               stride, and padding for outlook attention
+    """
+    def __init__(
+        self,
+        layers,
+        img_size=224,
+        in_channels=3,
+        num_classes=1000,
+        patch_size=8,
+        stem_hidden_dim=64,
+        embed_dims=None,
+        num_heads=None,
+        downsamples=None,
+        outlook_attention=None,
+        mlp_ratios=None,
+        qkv_bias=False,
+        qk_scale=None,
+        drop_rate=0.0,
+        attn_drop_rate=0.0,
+        drop_path_rate=0.0,
+        norm_layer=nn.LayerNorm,
+        post_layers=None,
+        return_mean=False,
+        return_dense=True,
+        mix_token=True,
+        pooling_scale=2,
+        out_kernel=3,
+        out_stride=2,
+        out_padding=1,
+    ) -> None:
+
+        super().__init__()
+        self.num_classes = num_classes
+        self.patch_embed = PatchEmbed(stem_conv=True, stem_stride=2, patch_size=patch_size,
+                                      in_channels=in_channels, hidden_dim=stem_hidden_dim,
+                                      embed_dim=embed_dims[0])
+        # inital positional encoding, we add positional encoding after outlooker blocks
+        self.pos_embed = Parameter(
+            ops.zeros((1, img_size // patch_size // pooling_scale,
+                      img_size // patch_size // pooling_scale,
+                      embed_dims[-1]), mstype.float32))
+
+        self.pos_drop = Dropout(p=drop_rate)
+
+        # set the main block in network
+        network = []
+        for i in range(len(layers)):
+            if outlook_attention[i]:
+                # stage 1
+                stage = outlooker_blocks(Outlooker, i, embed_dims[i], layers,
+                                         downsample=downsamples[i], num_heads=num_heads[i],
+                                         kernel_size=out_kernel, stride=out_stride,
+                                         padding=out_padding, mlp_ratio=mlp_ratios[i],
+                                         qkv_bias=qkv_bias, qk_scale=qk_scale,
+                                         attn_drop=attn_drop_rate, norm_layer=norm_layer)
+                network.append(stage)
+            else:
+                # stage 2
+                stage = transformer_blocks(Transformer, i, embed_dims[i], layers,
+                                           num_heads[i], mlp_ratio=mlp_ratios[i],
+                                           qkv_bias=qkv_bias, qk_scale=qk_scale,
+                                           drop_path_rate=drop_path_rate,
+                                           attn_drop=attn_drop_rate,
+                                           norm_layer=norm_layer)
+                network.append(stage)
+
+            if downsamples[i]:
+                # downsampling between two stages
+                network.append(Downsample(embed_dims[i], embed_dims[i + 1], 2))
+
+        self.network = nn.CellList(network)
+
+        # set post block, for example, class attention layers
+        self.post_network = None
+        if post_layers is not None:
+            self.post_network = nn.CellList([
+                get_block(post_layers[i],
+                          dim=embed_dims[-1],
+                          num_heads=num_heads[-1],
+                          mlp_ratio=mlp_ratios[-1],
+                          qkv_bias=qkv_bias,
+                          qk_scale=qk_scale,
+                          attn_drop=attn_drop_rate,
+                          drop_path=0.0,
+                          norm_layer=norm_layer)
+                for i in range(len(post_layers))
+            ])
+            self.cls_token = Parameter(ops.zeros((1, 1, embed_dims[-1]), mstype.float32))
+            self.cls_token.set_data(init.initializer(init.TruncatedNormal(sigma=.02), self.cls_token.data.shape))
+
+        # set output type
+        self.return_mean = return_mean  # if yes, return mean, not use class token
+        self.return_dense = return_dense  # if yes, return class token and all feature tokens
+        if return_dense:
+            assert not return_mean, "cannot return both mean and dense"
+        self.mix_token = mix_token
+        self.pooling_scale = pooling_scale
+        if mix_token:  # enable token mixing, see token labeling for details.
+            self.beta = 1.0
+            assert return_dense, "return all tokens if mix_token is enabled"
+        if return_dense:
+            self.aux_head = nn.Dense(
+                embed_dims[-1],
+                num_classes) if num_classes > 0 else Identity()
+        self.norm = norm_layer([embed_dims[-1]])
+
+        # Classifier head
+        self.head = nn.Dense(
+            embed_dims[-1], num_classes) if num_classes > 0 else Identity()
+
+        self.pos_embed.set_data(init.initializer(init.TruncatedNormal(sigma=.02), self.pos_embed.data.shape))
+        self._init_weights()
+
+    def _init_weights(self) -> None:
+        for name, m in self.cells_and_names():
+            if isinstance(m, nn.Dense):
+                m.weight.set_data(init.initializer(init.TruncatedNormal(sigma=.02), m.weight.data.shape))
+                if m.bias is not None:
+                    m.bias.set_data(init.initializer(init.Constant(0), m.bias.shape))
+            elif isinstance(m, nn.LayerNorm):
+                m.gamma.set_data(init.initializer(init.Constant(1), m.gamma.shape))
+                m.beta.set_data(init.initializer(init.Constant(0), m.beta.shape))
+
+    def forward_embeddings(self, x: Tensor) -> Tensor:
+        # patch embedding
+        x = self.patch_embed(x)
+        # B,C,H,W-> B,H,W,C
+        x = ops.transpose(x, (0, 2, 3, 1))
+        return x
+
+    def forward_tokens(self, x: Tensor) -> Tensor:
+        for idx, block in enumerate(self.network):
+            if idx == 2:  # add positional encoding after outlooker blocks
+                x = x + self.pos_embed
+                x = self.pos_drop(x)
+            x = block(x)
+
+        B, H, W, C = x.shape
+        x = ops.reshape(x, (B, -1, C))
+        return x
+
+    def forward_cls(self, x: Tensor) -> Tensor:
+        # B, N, C = x.shape
+        cls_tokens = ops.broadcast_to(self.cls_token, (x.shape[0], -1, -1))
+        x = ops.Cast()(x, cls_tokens.dtype)
+        x = ops.concat([cls_tokens, x], 1)
+        for block in self.post_network:
+            x = block(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        # step1: patch embedding
+        x = self.forward_embeddings(x)
+
+        # step2: tokens learning in the two stages
+        x = self.forward_tokens(x)
+
+        # step3: post network, apply class attention or not
+        if self.post_network is not None:
+            x = self.forward_cls(x)
+        x = self.norm(x)
+
+        if self.return_mean:  # if no class token, return mean
+            return self.head(ops.mean(x, 1))
+
+        x_cls = self.head(x[:, 0])
+        if not self.return_dense:
+            return x_cls
+
+        return x_cls
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.volo.volo_d1(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

VOLO-D1 model, Params: 27M +--layers: [x,x,x,x], four blocks in two stages, the first stage(block) is outlooker, + the other three blocks are transformer, we set four blocks, which are easily + applied to downstream tasks +--embed_dims, --num_heads,: embedding dim, number of heads in each block +--downsamples: flags to apply downsampling or not in four blocks +--outlook_attention: flags to apply outlook attention or not +--mlp_ratios: mlp ratio in four blocks +--post_layers: post layers like two class attention layers using [ca, ca] +See detail for all args in the class VOLO()

+ +
+ Source code in mindcv/models/volo.py +
745
+746
+747
+748
+749
+750
+751
+752
+753
+754
+755
+756
+757
+758
+759
+760
+761
+762
+763
+764
+765
+766
+767
+768
+769
+770
+771
+772
+773
+774
+775
+776
@register_model
+def volo_d1(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """
+    VOLO-D1 model, Params: 27M
+    --layers: [x,x,x,x], four blocks in two stages, the first stage(block) is outlooker,
+            the other three blocks are transformer, we set four blocks, which are easily
+             applied to downstream tasks
+    --embed_dims, --num_heads,: embedding dim, number of heads in each block
+    --downsamples: flags to apply downsampling or not in four blocks
+    --outlook_attention: flags to apply outlook attention or not
+    --mlp_ratios: mlp ratio in four blocks
+    --post_layers: post layers like two class attention layers using [ca, ca]
+    See detail for all args in the class VOLO()
+    """
+    default_cfg = default_cfgs['volo_d1']
+
+    # first block is outlooker (stage1), the other three are transformer (stage2)
+    model = VOLO(layers=[4, 4, 8, 2],
+                 in_channels=in_channels,
+                 num_classes=num_classes,
+                 embed_dims=[192, 384, 384, 384],
+                 num_heads=[6, 12, 12, 12],
+                 mlp_ratios=[3, 3, 3, 3],
+                 downsamples=[True, False, False, False],
+                 outlook_attention=[True, False, False, False],
+                 post_layers=['ca', 'ca'],
+                 **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.volo.volo_d2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

VOLO-D2 model, Params: 59M

+ +
+ Source code in mindcv/models/volo.py +
779
+780
+781
+782
+783
+784
+785
+786
+787
+788
+789
+790
+791
+792
+793
+794
+795
+796
+797
+798
+799
@register_model
+def volo_d2(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """
+    VOLO-D2 model, Params: 59M
+    """
+    default_cfg = default_cfgs['volo_d2']
+    model = VOLO(layers=[6, 4, 10, 4],
+                 in_channels=in_channels,
+                 num_classes=num_classes,
+                 embed_dims=[256, 512, 512, 512],
+                 num_heads=[8, 16, 16, 16],
+                 mlp_ratios=[3, 3, 3, 3],
+                 downsamples=[True, False, False, False],
+                 outlook_attention=[True, False, False, False],
+                 post_layers=['ca', 'ca'],
+                 **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.volo.volo_d3(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

VOLO-D3 model, Params: 86M

+ +
+ Source code in mindcv/models/volo.py +
802
+803
+804
+805
+806
+807
+808
+809
+810
+811
+812
+813
+814
+815
+816
+817
+818
+819
+820
+821
+822
@register_model
+def volo_d3(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """
+    VOLO-D3 model, Params: 86M
+    """
+    default_cfg = default_cfgs['volo_d3']
+    model = VOLO(layers=[8, 8, 16, 4],
+                 in_channels=in_channels,
+                 num_classes=num_classes,
+                 embed_dims=[256, 512, 512, 512],
+                 num_heads=[8, 16, 16, 16],
+                 mlp_ratios=[3, 3, 3, 3],
+                 downsamples=[True, False, False, False],
+                 outlook_attention=[True, False, False, False],
+                 post_layers=['ca', 'ca'],
+                 **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.volo.volo_d4(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

VOLO-D4 model, Params: 193M

+ +
+ Source code in mindcv/models/volo.py +
825
+826
+827
+828
+829
+830
+831
+832
+833
+834
+835
+836
+837
+838
+839
+840
+841
+842
+843
+844
+845
@register_model
+def volo_d4(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """
+    VOLO-D4 model, Params: 193M
+    """
+    default_cfg = default_cfgs['volo_d4']
+    model = VOLO(layers=[8, 8, 16, 4],
+                 in_channels=in_channels,
+                 num_classes=num_classes,
+                 embed_dims=[384, 768, 768, 768],
+                 num_heads=[12, 16, 16, 16],
+                 mlp_ratios=[3, 3, 3, 3],
+                 downsamples=[True, False, False, False],
+                 outlook_attention=[True, False, False, False],
+                 post_layers=['ca', 'ca'],
+                 **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.volo.volo_d5(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

VOLO-D5 model, Params: 296M +stem_hidden_dim=128, the dim in patch embedding is 128 for VOLO-D5

+ +
+ Source code in mindcv/models/volo.py +
848
+849
+850
+851
+852
+853
+854
+855
+856
+857
+858
+859
+860
+861
+862
+863
+864
+865
+866
+867
+868
@register_model
+def volo_d5(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """
+    VOLO-D5 model, Params: 296M
+    stem_hidden_dim=128, the dim in patch embedding is 128 for VOLO-D5
+    """
+    default_cfg = default_cfgs['volo_d5']
+    model = VOLO(layers=[12, 12, 20, 4],
+                 embed_dims=[384, 768, 768, 768],
+                 num_heads=[12, 16, 16, 16],
+                 mlp_ratios=[4, 4, 4, 4],
+                 downsamples=[True, False, False, False],
+                 outlook_attention=[True, False, False, False],
+                 post_layers=['ca', 'ca'],
+                 stem_hidden_dim=128,
+                 **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

xcit

+ + +
+ + + +

+ mindcv.models.xcit.XCiT + + +

+ + +
+

+ Bases: nn.Cell

+ + +

XCiT model class, based on +"XCiT: Cross-Covariance Image Transformers" <https://arxiv.org/abs/2106.09681>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
img_size +
+

input image size

+
+

+ + TYPE: + (int, tuple) + + + DEFAULT: + 224 + +

+
patch_size +
+

patch size

+
+

+ + TYPE: + (int, tuple) + + + DEFAULT: + 16 + +

+
in_chans +
+

number of input channels

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classes for classification head

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
embed_dim +
+

embedding dimension

+
+

+ + TYPE: + int + + + DEFAULT: + 768 + +

+
depth +
+

depth of transformer

+
+

+ + TYPE: + int + + + DEFAULT: + 12 + +

+
num_heads +
+

number of attention heads

+
+

+ + TYPE: + int + + + DEFAULT: + 12 + +

+
mlp_ratio +
+

ratio of mlp hidden dim to embedding dim

+
+

+ + TYPE: + int + + + DEFAULT: + 4.0 + +

+
qkv_bias +
+

enable bias for qkv if True

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
qk_scale +
+

override default qk scale of head_dim ** -0.5 if set

+
+

+ + TYPE: + float + + + DEFAULT: + None + +

+
drop_rate +
+

dropout rate

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
attn_drop_rate +
+

attention dropout rate

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
drop_path_rate +
+

stochastic depth rate

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
norm_layer +
+

(nn.Module): normalization layer

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + None + +

+
cls_attn_layers +
+

(int) Depth of Class attention layers

+
+

+ + TYPE: + int + + + DEFAULT: + 2 + +

+
use_pos +
+

(bool) whether to use positional encoding

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
eta +
+

(float) layerscale initialization value

+
+

+ + TYPE: + float + + + DEFAULT: + None + +

+
tokens_norm +
+

(bool) Whether to normalize all tokens or just the cls_token in the CA

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/xcit.py +
354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
class XCiT(nn.Cell):
+    r"""XCiT model class, based on
+    `"XCiT: Cross-Covariance Image Transformers" <https://arxiv.org/abs/2106.09681>`_
+    Args:
+        img_size (int, tuple): input image size
+        patch_size (int, tuple): patch size
+        in_chans (int): number of input channels
+        num_classes (int): number of classes for classification head
+        embed_dim (int): embedding dimension
+        depth (int): depth of transformer
+        num_heads (int): number of attention heads
+        mlp_ratio (int): ratio of mlp hidden dim to embedding dim
+        qkv_bias (bool): enable bias for qkv if True
+        qk_scale (float): override default qk scale of head_dim ** -0.5 if set
+        drop_rate (float): dropout rate
+        attn_drop_rate (float): attention dropout rate
+        drop_path_rate (float): stochastic depth rate
+        norm_layer: (nn.Module): normalization layer
+        cls_attn_layers: (int) Depth of Class attention layers
+        use_pos: (bool) whether to use positional encoding
+        eta: (float) layerscale initialization value
+        tokens_norm: (bool) Whether to normalize all tokens or just the cls_token in the CA
+    """
+
+    def __init__(self,
+                 img_size: int = 224,
+                 patch_size: int = 16,
+                 in_chans: int = 3,
+                 num_classes: int = 1000,
+                 embed_dim: int = 768,
+                 depth: int = 12,
+                 num_heads: int = 12,
+                 mlp_ratio: int = 4.,
+                 qkv_bias: bool = True,
+                 qk_scale: float = None,
+                 drop_rate: float = 0.,
+                 attn_drop_rate: float = 0.,
+                 drop_path_rate: float = 0.,
+                 norm_layer: nn.Cell = None,
+                 cls_attn_layers: int = 2,
+                 use_pos: bool = True,
+                 patch_proj: str = 'linear',
+                 eta: float = None,
+                 tokens_norm: bool = False):
+        super().__init__()
+
+        self.num_classes = num_classes
+        self.num_features = self.embed_dim = embed_dim
+        norm_layer = norm_layer or partial(nn.LayerNorm, epsilon=1e-6)
+
+        self.patch_embed = ConvPatchEmbed(img_size=img_size, embed_dim=embed_dim,
+                                          patch_size=patch_size)
+
+        num_patches = self.patch_embed.num_patches
+
+        self.cls_token = Parameter(
+            ops.zeros((1, 1, embed_dim), mstype.float32))
+        self.pos_drop = Dropout(p=drop_rate)
+
+        dpr = [drop_path_rate for i in range(depth)]
+        self.blocks = nn.CellList([
+            XCABlock(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
+                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
+                norm_layer=norm_layer, num_tokens=num_patches, eta=eta)
+            for i in range(depth)])
+
+        self.cls_attn_blocks = nn.CellList([
+            ClassAttentionBlock(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
+                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, norm_layer=norm_layer,
+                eta=eta, tokens_norm=tokens_norm)
+            for i in range(cls_attn_layers)])
+        self.norm = norm_layer([embed_dim])
+        self.head = nn.Dense(
+            in_channels=embed_dim, out_channels=num_classes) if num_classes > 0 else ops.Identity()
+
+        self.pos_embeder = PositionalEncodingFourier(dim=embed_dim)
+        self.use_pos = use_pos
+
+        # Classifier head
+        self.cls_token.set_data(weight_init.initializer(weight_init.TruncatedNormal(sigma=0.02),
+                                                        self.cls_token.shape,
+                                                        self.cls_token.dtype))
+        self._init_weights()
+
+    def _init_weights(self) -> None:
+        for name, m in self.cells_and_names():
+            if isinstance(m, nn.Dense):
+                m.weight = weight_init.initializer(weight_init.TruncatedNormal(
+                    sigma=0.02), m.weight.shape, mindspore.float32)
+                if m.bias is not None:
+                    m.bias.set_data(weight_init.initializer(
+                        weight_init.Constant(0), m.bias.shape))
+            elif isinstance(m, nn.LayerNorm):
+                m.beta.set_data(weight_init.initializer(
+                    weight_init.Constant(0), m.beta.shape))
+                m.gamma.set_data(weight_init.initializer(
+                    weight_init.Constant(1), m.gamma.shape))
+
+    def forward_features(self, x):
+        B, C, H, W = x.shape
+        x, (Hp, Wp) = self.patch_embed(x)
+        if self.use_pos:
+            pos_encoding = self.pos_embeder(B, Hp, Wp).reshape(
+                B, -1, x.shape[1]).transpose(0, 2, 1)
+            x = x + pos_encoding
+        x = self.pos_drop(x)
+        for blk in self.blocks:
+            x = blk(x, Hp, Wp)
+        cls_tokens = ops.broadcast_to(self.cls_token, (B, -1, -1))
+        cls_tokens = ops.cast(cls_tokens, x.dtype)
+        x = ops.concat((cls_tokens, x), 1)
+
+        for blk in self.cls_attn_blocks:
+            x = blk(x, Hp, Wp)
+        return self.norm(x)[:, 0]
+
+    def construct(self, x):
+        x = self.forward_features(x)
+        x = self.head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.xcit.xcit_tiny_12_p16_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get xcit_tiny_12_p16_224 model. +Refer to the base class 'models.XCiT' for more details.

+ +
+ Source code in mindcv/models/xcit.py +
478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
@register_model
+def xcit_tiny_12_p16_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> XCiT:
+    """Get xcit_tiny_12_p16_224 model.
+    Refer to the base class 'models.XCiT' for more details.
+    """
+    default_cfg = default_cfgs['xcit_tiny_12_p16_224']
+    model = XCiT(
+        patch_size=16, num_classes=num_classes, embed_dim=192, depth=12, num_heads=4, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), eta=1.0, tokens_norm=True, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg,
+                        num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/reference/optim/index.html b/reference/optim/index.html new file mode 100644 index 000000000..8c7fc3997 --- /dev/null +++ b/reference/optim/index.html @@ -0,0 +1,2587 @@ + + + + + + + + + + + + + + + + + + + + + + + + optim - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Optimizer

+

Optimizer Factory

+ + + +
+ + + +

+mindcv.optim.optim_factory.create_optimizer(params, opt='adam', lr=0.001, weight_decay=0, momentum=0.9, nesterov=False, filter_bias_and_bn=True, loss_scale=1.0, schedule_decay=0.004, checkpoint_path='', eps=1e-10, **kwargs) + +

+ + +
+ +

Creates optimizer by name.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
params +
+

network parameters. Union[list[Parameter],list[dict]], which must be the list of parameters +or list of dicts. When the list element is a dictionary, the key of the dictionary can be +"params", "lr", "weight_decay","grad_centralization" and "order_params".

+
+

+

+
opt +
+

wrapped optimizer. You could choose like 'sgd', 'nesterov', 'momentum', 'adam', 'adamw', 'lion', +'rmsprop', 'adagrad', 'lamb'. 'adam' is the default choose for convolution-based networks. +'adamw' is recommended for ViT-based networks. Default: 'adam'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'adam' + +

+
lr +
+

learning rate: float or lr scheduler. Fixed and dynamic learning rate are supported. Default: 1e-3.

+
+

+ + TYPE: + Optional[float] + + + DEFAULT: + 0.001 + +

+
weight_decay +
+

weight decay factor. It should be noted that weight decay can be a constant value or a Cell. +It is a Cell only when dynamic weight decay is applied. Dynamic weight decay is similar to +dynamic learning rate, users need to customize a weight decay schedule only with global step as input, +and during training, the optimizer calls the instance of WeightDecaySchedule to get the weight decay value +of current step. Default: 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0 + +

+
momentum +
+

momentum if the optimizer supports. Default: 0.9.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.9 + +

+
nesterov +
+

Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
filter_bias_and_bn +
+

whether to filter batch norm parameters and bias from weight decay. +If True, weight decay will not apply on BN parameters and bias in Conv or Dense layers. Default: True.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
loss_scale +
+

A floating point value for the loss scale, which must be larger than 0.0. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

Optimizer object

+
+
+ +
+ Source code in mindcv/optim/optim_factory.py +
 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
def create_optimizer(
+    params,
+    opt: str = "adam",
+    lr: Optional[float] = 1e-3,
+    weight_decay: float = 0,
+    momentum: float = 0.9,
+    nesterov: bool = False,
+    filter_bias_and_bn: bool = True,
+    loss_scale: float = 1.0,
+    schedule_decay: float = 4e-3,
+    checkpoint_path: str = "",
+    eps: float = 1e-10,
+    **kwargs,
+):
+    r"""Creates optimizer by name.
+
+    Args:
+        params: network parameters. Union[list[Parameter],list[dict]], which must be the list of parameters
+            or list of dicts. When the list element is a dictionary, the key of the dictionary can be
+            "params", "lr", "weight_decay","grad_centralization" and "order_params".
+        opt: wrapped optimizer. You could choose like 'sgd', 'nesterov', 'momentum', 'adam', 'adamw', 'lion',
+            'rmsprop', 'adagrad', 'lamb'. 'adam' is the default choose for convolution-based networks.
+            'adamw' is recommended for ViT-based networks. Default: 'adam'.
+        lr: learning rate: float or lr scheduler. Fixed and dynamic learning rate are supported. Default: 1e-3.
+        weight_decay: weight decay factor. It should be noted that weight decay can be a constant value or a Cell.
+            It is a Cell only when dynamic weight decay is applied. Dynamic weight decay is similar to
+            dynamic learning rate, users need to customize a weight decay schedule only with global step as input,
+            and during training, the optimizer calls the instance of WeightDecaySchedule to get the weight decay value
+            of current step. Default: 0.
+        momentum: momentum if the optimizer supports. Default: 0.9.
+        nesterov: Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients. Default: False.
+        filter_bias_and_bn: whether to filter batch norm parameters and bias from weight decay.
+            If True, weight decay will not apply on BN parameters and bias in Conv or Dense layers. Default: True.
+        loss_scale: A floating point value for the loss scale, which must be larger than 0.0. Default: 1.0.
+
+    Returns:
+        Optimizer object
+    """
+
+    opt = opt.lower()
+
+    if weight_decay and filter_bias_and_bn:
+        params = init_group_params(params, weight_decay)
+
+    opt_args = dict(**kwargs)
+    # if lr is not None:
+    #    opt_args.setdefault('lr', lr)
+
+    # non-adaptive: SGD, momentum, and nesterov
+    if opt == "sgd":
+        # note: nn.Momentum may perform better if momentum > 0.
+        optimizer = nn.SGD(
+            params=params,
+            learning_rate=lr,
+            momentum=momentum,
+            weight_decay=weight_decay,
+            nesterov=nesterov,
+            loss_scale=loss_scale,
+            **opt_args,
+        )
+    elif opt in ["momentum", "nesterov"]:
+        optimizer = nn.Momentum(
+            params=params,
+            learning_rate=lr,
+            momentum=momentum,
+            weight_decay=weight_decay,
+            use_nesterov=nesterov,
+            loss_scale=loss_scale,
+        )
+    # adaptive
+    elif opt == "adam":
+        optimizer = nn.Adam(
+            params=params,
+            learning_rate=lr,
+            weight_decay=weight_decay,
+            loss_scale=loss_scale,
+            use_nesterov=nesterov,
+            **opt_args,
+        )
+    elif opt == "adamw":
+        optimizer = AdamW(
+            params=params,
+            learning_rate=lr,
+            weight_decay=weight_decay,
+            loss_scale=loss_scale,
+            **opt_args,
+        )
+    elif opt == "lion":
+        optimizer = Lion(
+            params=params,
+            learning_rate=lr,
+            weight_decay=weight_decay,
+            loss_scale=loss_scale,
+            **opt_args,
+        )
+    elif opt == "nadam":
+        optimizer = NAdam(
+            params=params,
+            learning_rate=lr,
+            weight_decay=weight_decay,
+            loss_scale=loss_scale,
+            schedule_decay=schedule_decay,
+            **opt_args,
+        )
+    elif opt == "adan":
+        optimizer = Adan(
+            params=params,
+            learning_rate=lr,
+            weight_decay=weight_decay,
+            loss_scale=loss_scale,
+            **opt_args,
+        )
+    elif opt == "rmsprop":
+        optimizer = nn.RMSProp(
+            params=params,
+            learning_rate=lr,
+            momentum=momentum,
+            weight_decay=weight_decay,
+            loss_scale=loss_scale,
+            epsilon=eps,
+            **opt_args,
+        )
+    elif opt == "adagrad":
+        optimizer = nn.Adagrad(
+            params=params,
+            learning_rate=lr,
+            weight_decay=weight_decay,
+            loss_scale=loss_scale,
+            **opt_args,
+        )
+    elif opt == "lamb":
+        assert loss_scale == 1.0, "Loss scaler is not supported by Lamb optimizer"
+        optimizer = nn.Lamb(
+            params=params,
+            learning_rate=lr,
+            weight_decay=weight_decay,
+            **opt_args,
+        )
+    else:
+        raise ValueError(f"Invalid optimizer: {opt}")
+
+    if os.path.exists(checkpoint_path):
+        param_dict = load_checkpoint(checkpoint_path)
+        load_param_into_net(optimizer, param_dict)
+
+    return optimizer
+
+
+
+ +

AdamW

+ + +
+ + + +

+ mindcv.optim.adamw.AdamW + + +

+ + +
+

+ Bases: Optimizer

+ + +

Implements the gradient clipping by norm for a AdamWeightDecay optimizer.

+ +
+ Source code in mindcv/optim/adamw.py +
126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
class AdamW(Optimizer):
+    """
+    Implements the gradient clipping by norm for a AdamWeightDecay optimizer.
+    """
+
+    @opt_init_args_register
+    def __init__(
+        self,
+        params,
+        learning_rate=1e-3,
+        beta1=0.9,
+        beta2=0.999,
+        eps=1e-8,
+        weight_decay=0.0,
+        loss_scale=1.0,
+        clip=False,
+    ):
+        super().__init__(learning_rate, params, weight_decay)
+        _check_param_value(beta1, beta2, eps, self.cls_name)
+        self.beta1 = Tensor(np.array([beta1]).astype(np.float32))
+        self.beta2 = Tensor(np.array([beta2]).astype(np.float32))
+        self.eps = Tensor(np.array([eps]).astype(np.float32))
+        self.moments1 = self.parameters.clone(prefix="adam_m", init="zeros")
+        self.moments2 = self.parameters.clone(prefix="adam_v", init="zeros")
+        self.hyper_map = ops.HyperMap()
+        self.beta1_power = Parameter(initializer(1, [1], ms.float32), name="beta1_power")
+        self.beta2_power = Parameter(initializer(1, [1], ms.float32), name="beta2_power")
+
+        self.reciprocal_scale = Tensor(1.0 / loss_scale, ms.float32)
+        self.clip = clip
+
+    def construct(self, gradients):
+        lr = self.get_lr()
+        gradients = scale_grad(gradients, self.reciprocal_scale)
+        if self.clip:
+            gradients = ops.clip_by_global_norm(gradients, 5.0, None)
+
+        beta1_power = self.beta1_power * self.beta1
+        self.beta1_power = beta1_power
+        beta2_power = self.beta2_power * self.beta2
+        self.beta2_power = beta2_power
+
+        if self.is_group:
+            if self.is_group_lr:
+                optim_result = self.hyper_map(
+                    ops.partial(_adam_opt, beta1_power, beta2_power, self.beta1, self.beta2, self.eps),
+                    lr,
+                    self.weight_decay,
+                    self.parameters,
+                    self.moments1,
+                    self.moments2,
+                    gradients,
+                    self.decay_flags,
+                    self.optim_filter,
+                )
+            else:
+                optim_result = self.hyper_map(
+                    ops.partial(_adam_opt, beta1_power, beta2_power, self.beta1, self.beta2, self.eps, lr),
+                    self.weight_decay,
+                    self.parameters,
+                    self.moments1,
+                    self.moments2,
+                    gradients,
+                    self.decay_flags,
+                    self.optim_filter,
+                )
+        else:
+            optim_result = self.hyper_map(
+                ops.partial(
+                    _adam_opt, beta1_power, beta2_power, self.beta1, self.beta2, self.eps, lr, self.weight_decay
+                ),
+                self.parameters,
+                self.moments1,
+                self.moments2,
+                gradients,
+                self.decay_flags,
+                self.optim_filter,
+            )
+        if self.use_parallel:
+            self.broadcast_params(optim_result)
+        return optim_result
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Adan

+ + +
+ + + +

+ mindcv.optim.adan.Adan + + +

+ + +
+

+ Bases: Optimizer

+ + +

The Adan (ADAptive Nesterov momentum algorithm) Optimizer from https://arxiv.org/abs/2208.06677

+

Note: it is an experimental version.

+ +
+ Source code in mindcv/optim/adan.py +
107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
class Adan(Optimizer):
+    """
+    The Adan (ADAptive Nesterov momentum algorithm) Optimizer from https://arxiv.org/abs/2208.06677
+
+    Note: it is an experimental version.
+    """
+
+    @opt_init_args_register
+    def __init__(
+        self,
+        params,
+        learning_rate=1e-3,
+        beta1=0.98,
+        beta2=0.92,
+        beta3=0.99,
+        eps=1e-8,
+        use_locking=False,
+        weight_decay=0.0,
+        loss_scale=1.0,
+    ):
+        super().__init__(
+            learning_rate, params, weight_decay=weight_decay, loss_scale=loss_scale
+        )  # Optimized inherit weight decay is bloaked. weight decay is computed in this py.
+
+        _check_param_value(beta1, beta2, eps, self.cls_name)
+        assert isinstance(use_locking, bool), f"For {self.cls_name}, use_looking should be bool"
+
+        self.beta1 = Tensor(beta1, mstype.float32)
+        self.beta2 = Tensor(beta2, mstype.float32)
+        self.beta3 = Tensor(beta3, mstype.float32)
+
+        self.eps = Tensor(eps, mstype.float32)
+        self.use_locking = use_locking
+        self.moment1 = self._parameters.clone(prefix="moment1", init="zeros")  # m
+        self.moment2 = self._parameters.clone(prefix="moment2", init="zeros")  # v
+        self.moment3 = self._parameters.clone(prefix="moment3", init="zeros")  # n
+        self.prev_gradient = self._parameters.clone(prefix="prev_gradient", init="zeros")
+
+        self.weight_decay = Tensor(weight_decay, mstype.float32)
+
+    def construct(self, gradients):
+        params = self._parameters
+        moment1 = self.moment1
+        moment2 = self.moment2
+        moment3 = self.moment3
+
+        gradients = self.flatten_gradients(gradients)
+        gradients = self.gradients_centralization(gradients)
+        gradients = self.scale_grad(gradients)
+        gradients = self._grad_sparse_indices_deduplicate(gradients)
+        lr = self.get_lr()
+
+        # TODO: currently not support dist
+        success = self.map_(
+            ops.partial(_adan_opt, self.beta1, self.beta2, self.beta3, self.eps, lr, self.weight_decay),
+            params,
+            moment1,
+            moment2,
+            moment3,
+            gradients,
+            self.prev_gradient,
+        )
+
+        return success
+
+    @Optimizer.target.setter
+    def target(self, value):
+        """
+        If the input value is set to "CPU", the parameters will be updated on the host using the Fused
+        optimizer operation.
+        """
+        self._set_base_target(value)
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.optim.adan.Adan.target(value) + +

+ + +
+ +

If the input value is set to "CPU", the parameters will be updated on the host using the Fused +optimizer operation.

+ +
+ Source code in mindcv/optim/adan.py +
172
+173
+174
+175
+176
+177
+178
@Optimizer.target.setter
+def target(self, value):
+    """
+    If the input value is set to "CPU", the parameters will be updated on the host using the Fused
+    optimizer operation.
+    """
+    self._set_base_target(value)
+
+
+
+ +
+ + + +
+ +
+ +

Lion

+ + +
+ + + +

+ mindcv.optim.lion.Lion + + +

+ + +
+

+ Bases: Optimizer

+ + +

Implementation of Lion optimizer from paper 'https://arxiv.org/abs/2302.06675'. +Additionally, this implementation is with gradient clipping.

+

Notes: +lr is usually 3-10x smaller than adamw. +weight decay is usually 3-10x larger than adamw.

+ +
+ Source code in mindcv/optim/lion.py +
112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
class Lion(Optimizer):
+    """
+    Implementation of Lion optimizer from paper 'https://arxiv.org/abs/2302.06675'.
+    Additionally, this implementation is with gradient clipping.
+
+    Notes:
+    lr is usually 3-10x smaller than adamw.
+    weight decay is usually 3-10x larger than adamw.
+    """
+
+    @opt_init_args_register
+    def __init__(
+        self,
+        params,
+        learning_rate=2e-4,
+        beta1=0.9,
+        beta2=0.99,
+        weight_decay=0.0,
+        loss_scale=1.0,
+        clip=False,
+    ):
+        super().__init__(learning_rate, params, weight_decay)
+        _check_param_value(beta1, beta2, self.cls_name)
+        self.beta1 = Tensor(np.array([beta1]).astype(np.float32))
+        self.beta2 = Tensor(np.array([beta2]).astype(np.float32))
+        self.moments1 = self.parameters.clone(prefix="lion_m", init="zeros")
+        self.hyper_map = ops.HyperMap()
+        self.beta1_power = Parameter(initializer(1, [1], ms.float32), name="beta1_power")
+        self.beta2_power = Parameter(initializer(1, [1], ms.float32), name="beta2_power")
+
+        self.reciprocal_scale = Tensor(1.0 / loss_scale, ms.float32)
+        self.clip = clip
+
+    def construct(self, gradients):
+        lr = self.get_lr()
+        gradients = scale_grad(gradients, self.reciprocal_scale)
+        if self.clip:
+            gradients = ops.clip_by_global_norm(gradients, 5.0, None)
+
+        beta1_power = self.beta1_power * self.beta1
+        self.beta1_power = beta1_power
+        beta2_power = self.beta2_power * self.beta2
+        self.beta2_power = beta2_power
+
+        if self.is_group:
+            if self.is_group_lr:
+                optim_result = self.hyper_map(
+                    ops.partial(_lion_opt, beta1_power, beta2_power, self.beta1, self.beta2),
+                    lr,
+                    self.weight_decay,
+                    self.parameters,
+                    self.moments1,
+                    gradients,
+                    self.decay_flags,
+                    self.optim_filter,
+                )
+            else:
+                optim_result = self.hyper_map(
+                    ops.partial(_lion_opt, beta1_power, beta2_power, self.beta1, self.beta2, lr),
+                    self.weight_decay,
+                    self.parameters,
+                    self.moments1,
+                    gradients,
+                    self.decay_flags,
+                    self.optim_filter,
+                )
+        else:
+            optim_result = self.hyper_map(
+                ops.partial(_lion_opt, beta1_power, beta2_power, self.beta1, self.beta2, lr, self.weight_decay),
+                self.parameters,
+                self.moments1,
+                gradients,
+                self.decay_flags,
+                self.optim_filter,
+            )
+        if self.use_parallel:
+            self.broadcast_params(optim_result)
+        return optim_result
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

NAdam

+ + +
+ + + +

+ mindcv.optim.nadam.NAdam + + +

+ + +
+

+ Bases: Optimizer

+ + +

Implements NAdam algorithm (a variant of Adam based on Nesterov momentum).

+ +
+ Source code in mindcv/optim/nadam.py +
23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
class NAdam(Optimizer):
+    """
+    Implements NAdam algorithm (a variant of Adam based on Nesterov momentum).
+    """
+
+    @opt_init_args_register
+    def __init__(
+        self,
+        params,
+        learning_rate=2e-3,
+        beta1=0.9,
+        beta2=0.999,
+        eps=1e-8,
+        weight_decay=0.0,
+        loss_scale=1.0,
+        schedule_decay=4e-3,
+    ):
+        super().__init__(learning_rate, params, weight_decay, loss_scale)
+        _check_param_value(beta1, beta2, eps, self.cls_name)
+        self.beta1 = Tensor(np.array([beta1]).astype(np.float32))
+        self.beta2 = Tensor(np.array([beta2]).astype(np.float32))
+        self.eps = Tensor(np.array([eps]).astype(np.float32))
+        self.moments1 = self.parameters.clone(prefix="nadam_m", init="zeros")
+        self.moments2 = self.parameters.clone(prefix="nadam_v", init="zeros")
+        self.schedule_decay = Tensor(np.array([schedule_decay]).astype(np.float32))
+        self.mu_schedule = Parameter(initializer(1, [1], ms.float32), name="mu_schedule")
+        self.beta2_power = Parameter(initializer(1, [1], ms.float32), name="beta2_power")
+
+    def construct(self, gradients):
+        lr = self.get_lr()
+        params = self.parameters
+        step = self.global_step + _scaler_one
+        gradients = self.decay_weight(gradients)
+        mu = self.beta1 * (
+            _scaler_one - Tensor(0.5, ms.float32) * ops.pow(Tensor(0.96, ms.float32), step * self.schedule_decay)
+        )
+        mu_next = self.beta1 * (
+            _scaler_one
+            - Tensor(0.5, ms.float32) * ops.pow(Tensor(0.96, ms.float32), (step + _scaler_one) * self.schedule_decay)
+        )
+        mu_schedule = self.mu_schedule * mu
+        mu_schedule_next = self.mu_schedule * mu * mu_next
+        self.mu_schedule = mu_schedule
+        beta2_power = self.beta2_power * self.beta2
+        self.beta2_power = beta2_power
+
+        num_params = len(params)
+        for i in range(num_params):
+            ops.assign(self.moments1[i], self.beta1 * self.moments1[i] + (_scaler_one - self.beta1) * gradients[i])
+            ops.assign(
+                self.moments2[i], self.beta2 * self.moments2[i] + (_scaler_one - self.beta2) * ops.square(gradients[i])
+            )
+
+            regulate_m = mu_next * self.moments1[i] / (_scaler_one - mu_schedule_next) + (_scaler_one - mu) * gradients[
+                i
+            ] / (_scaler_one - mu_schedule)
+            regulate_v = self.moments2[i] / (_scaler_one - beta2_power)
+
+            update = params[i] - lr * regulate_m / (self.eps + ops.sqrt(regulate_v))
+            ops.assign(params[i], update)
+
+        return params
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/reference/scheduler/index.html b/reference/scheduler/index.html new file mode 100644 index 000000000..9f771e3b8 --- /dev/null +++ b/reference/scheduler/index.html @@ -0,0 +1,2599 @@ + + + + + + + + + + + + + + + + + + + + + + + + scheduler - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Learning Rate Scheduler

+

Scheduler Factory

+ + + +
+ + + +

+mindcv.scheduler.scheduler_factory.create_scheduler(steps_per_epoch, scheduler='constant', lr=0.01, min_lr=1e-06, warmup_epochs=3, warmup_factor=0.0, decay_epochs=10, decay_rate=0.9, milestones=None, num_epochs=200, num_cycles=1, cycle_decay=1.0, lr_epoch_stair=False) + +

+ + +
+ +

Creates learning rate scheduler by name.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
steps_per_epoch +
+

number of steps per epoch.

+
+

+ + TYPE: + int + +

+
scheduler +
+

scheduler name like 'constant', 'cosine_decay', 'step_decay', +'exponential_decay', 'polynomial_decay', 'multi_step_decay'. Default: 'constant'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'constant' + +

+
lr +
+

learning rate value. Default: 0.01.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.01 + +

+
min_lr +
+

lower lr bound for 'cosine_decay' schedulers. Default: 1e-6.

+
+

+ + TYPE: + float + + + DEFAULT: + 1e-06 + +

+
warmup_epochs +
+

epochs to warmup LR, if scheduler supports. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
warmup_factor +
+

the warmup phase of scheduler is a linearly increasing lr, +the beginning factor is warmup_factor, i.e., the lr of the first step/epoch is lr*warmup_factor, +and the ending lr in the warmup phase is lr. Default: 0.0

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
decay_epochs +
+

for 'cosine_decay' schedulers, decay LR to min_lr in decay_epochs. +For 'step_decay' scheduler, decay LR by a factor of decay_rate every decay_epochs. Default: 10.

+
+

+ + TYPE: + int + + + DEFAULT: + 10 + +

+
decay_rate +
+

LR decay rate. Default: 0.9.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.9 + +

+
milestones +
+

list of epoch milestones for 'multi_step_decay' scheduler. Must be increasing. Default: None

+
+

+ + TYPE: + list + + + DEFAULT: + None + +

+
num_epochs +
+

Number of total epochs. Default: 200.

+
+

+ + TYPE: + int + + + DEFAULT: + 200 + +

+
num_cycles +
+

Number of cycles for cosine decay and cyclic. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
cycle_decay +
+

Decay rate of lr max in each cosine cycle. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
lr_epoch_stair +
+

If True, LR will be updated in the beginning of each new epoch +and the LR will be consistent for each batch in one epoch. +Otherwise, learning rate will be updated dynamically in each step. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

Cell object for computing LR with input of current global steps

+
+
+ +
+ Source code in mindcv/scheduler/scheduler_factory.py +
 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
def create_scheduler(
+    steps_per_epoch: int,
+    scheduler: str = "constant",
+    lr: float = 0.01,
+    min_lr: float = 1e-6,
+    warmup_epochs: int = 3,
+    warmup_factor: float = 0.0,
+    decay_epochs: int = 10,
+    decay_rate: float = 0.9,
+    milestones: list = None,
+    num_epochs: int = 200,
+    num_cycles: int = 1,
+    cycle_decay: float = 1.0,
+    lr_epoch_stair: bool = False,
+):
+    r"""Creates learning rate scheduler by name.
+
+    Args:
+        steps_per_epoch: number of steps per epoch.
+        scheduler: scheduler name like 'constant', 'cosine_decay', 'step_decay',
+            'exponential_decay', 'polynomial_decay', 'multi_step_decay'. Default: 'constant'.
+        lr: learning rate value. Default: 0.01.
+        min_lr: lower lr bound for 'cosine_decay' schedulers. Default: 1e-6.
+        warmup_epochs: epochs to warmup LR, if scheduler supports. Default: 3.
+        warmup_factor: the warmup phase of scheduler is a linearly increasing lr,
+            the beginning factor is `warmup_factor`, i.e., the lr of the first step/epoch is lr*warmup_factor,
+            and the ending lr in the warmup phase is lr. Default: 0.0
+        decay_epochs: for 'cosine_decay' schedulers, decay LR to min_lr in `decay_epochs`.
+            For 'step_decay' scheduler, decay LR by a factor of `decay_rate` every `decay_epochs`. Default: 10.
+        decay_rate: LR decay rate. Default: 0.9.
+        milestones: list of epoch milestones for 'multi_step_decay' scheduler. Must be increasing. Default: None
+        num_epochs: Number of total epochs. Default: 200.
+        num_cycles: Number of cycles for cosine decay and cyclic. Default: 1.
+        cycle_decay: Decay rate of lr max in each cosine cycle. Default: 1.0.
+        lr_epoch_stair: If True, LR will be updated in the beginning of each new epoch
+            and the LR will be consistent for each batch in one epoch.
+            Otherwise, learning rate will be updated dynamically in each step. Default: False.
+    Returns:
+        Cell object for computing LR with input of current global steps
+    """
+    # check params
+    if milestones is None:
+        milestones = []
+
+    if warmup_epochs + decay_epochs > num_epochs:
+        _logger.warning("warmup_epochs + decay_epochs > num_epochs. Please check and reduce decay_epochs!")
+
+    # lr warmup phase
+    warmup_lr_scheduler = []
+    if warmup_epochs > 0:
+        if warmup_factor == 0 and lr_epoch_stair:
+            _logger.warning(
+                "The warmup factor is set to 0, lr of 0-th epoch is always zero! " "Recommend value is 0.01."
+            )
+        warmup_func = linear_lr if lr_epoch_stair else linear_refined_lr
+        warmup_lr_scheduler = warmup_func(
+            start_factor=warmup_factor,
+            end_factor=1.0,
+            total_iters=warmup_epochs,
+            lr=lr,
+            steps_per_epoch=steps_per_epoch,
+            epochs=warmup_epochs,
+        )
+
+    # lr decay phase
+    main_epochs = num_epochs - warmup_epochs
+    if scheduler in ["cosine_decay", "warmup_cosine_decay"]:
+        cosine_func = cosine_decay_lr if lr_epoch_stair else cosine_decay_refined_lr
+        main_lr_scheduler = cosine_func(
+            decay_epochs=decay_epochs,
+            eta_min=min_lr,
+            eta_max=lr,
+            steps_per_epoch=steps_per_epoch,
+            epochs=main_epochs,
+            num_cycles=num_cycles,
+            cycle_decay=cycle_decay,
+        )
+    elif scheduler == "one_cycle":
+        if lr_epoch_stair or warmup_epochs > 0:
+            raise ValueError(
+                "OneCycle scheduler doesn't support learning rate varies with epoch and warmup_epochs > 0."
+            )
+        div_factor = 25.0
+        initial_lr = lr / div_factor
+        final_div_factor = initial_lr / min_lr
+        main_lr_scheduler = one_cycle_lr(
+            max_lr=lr,
+            final_div_factor=final_div_factor,
+            steps_per_epoch=steps_per_epoch,
+            epochs=main_epochs,
+        )
+    elif scheduler == "cyclic":
+        if lr_epoch_stair or warmup_epochs > 0:
+            raise ValueError("Cyclic scheduler doesn't support learning rate varies with epoch and warmup_epochs > 0.")
+        num_steps = steps_per_epoch * main_epochs
+        step_size_up = int(num_steps / num_cycles / 2)
+        main_lr_scheduler = cyclic_lr(
+            base_lr=min_lr,
+            max_lr=lr,
+            step_size_up=step_size_up,
+            steps_per_epoch=steps_per_epoch,
+            epochs=main_epochs,
+        )
+    elif scheduler == "exponential_decay":
+        exponential_func = exponential_lr if lr_epoch_stair else exponential_refined_lr
+        main_lr_scheduler = exponential_func(
+            gamma=decay_rate, lr=lr, steps_per_epoch=steps_per_epoch, epochs=main_epochs
+        )
+    elif scheduler == "polynomial_decay":
+        polynomial_func = polynomial_lr if lr_epoch_stair else polynomial_refined_lr
+        main_lr_scheduler = polynomial_func(
+            total_iters=main_epochs, power=decay_rate, lr=lr, steps_per_epoch=steps_per_epoch, epochs=main_epochs
+        )
+    elif scheduler == "step_decay":
+        main_lr_scheduler = step_lr(
+            step_size=decay_epochs, gamma=decay_rate, lr=lr, steps_per_epoch=steps_per_epoch, epochs=main_epochs
+        )
+    elif scheduler == "multi_step_decay":
+        main_lr_scheduler = multi_step_lr(
+            milestones=milestones, gamma=decay_rate, lr=lr, steps_per_epoch=steps_per_epoch, epochs=main_epochs
+        )
+    elif scheduler == "constant":
+        main_lr_scheduler = [lr for _ in range(steps_per_epoch * main_epochs)]
+    else:
+        raise ValueError(f"Invalid scheduler: {scheduler}")
+
+    # combine
+    lr_scheduler = warmup_lr_scheduler + main_lr_scheduler
+
+    return lr_scheduler
+
+
+
+ +
+ +
+ + + +

+ mindcv.scheduler.dynamic_lr + + +

+ +
+ +

Meta learning rate scheduler.

+

This module implements exactly the same learning rate scheduler as native PyTorch, +see "torch.optim.lr_scheduler" <https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate>_. +At present, only constant_lr, linear_lr, polynomial_lr, exponential_lr, step_lr, multi_step_lr, +cosine_annealing_lr, cosine_annealing_warm_restarts_lr, one_cycle_lr, cyclic_lr are implemented. +The number, name and usage of the Positional Arguments are exactly the same as those of native PyTorch.

+

However, due to the constraint of having to explicitly return the learning rate at each step, we have to +introduce additional Keyword Arguments. There are only three Keyword Arguments introduced, +namely lr, steps_per_epoch and epochs, explained as follows: +lr: the basic learning rate when creating optim in torch. +steps_per_epoch: the number of steps(iterations) of each epoch. +epochs: the number of epoch. It and steps_per_epoch determine the length of the returned lrs.

+

In all schedulers, one_cycle_lr and cyclic_lr only need two Keyword Arguments except lr, since +when creating optim in torch, lr argument will have no effect if using the two schedulers above.

+

Since most scheduler in PyTorch are coarse-grained, that is the learning rate is constant within a single epoch. +For non-stepwise scheduler, we introduce several fine-grained variation, that is the learning rate +is also changed within a single epoch. The function name of these variants have the refined keyword. +The implemented fine-grained variation are list as follows: linear_refined_lr, polynomial_refined_lr, etc.

+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.scheduler.dynamic_lr.cosine_decay_lr(decay_epochs, eta_min, *, eta_max, steps_per_epoch, epochs, num_cycles=1, cycle_decay=1.0) + +

+ + +
+ +

update every epoch

+ +
+ Source code in mindcv/scheduler/dynamic_lr.py +
120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
def cosine_decay_lr(decay_epochs, eta_min, *, eta_max, steps_per_epoch, epochs, num_cycles=1, cycle_decay=1.0):
+    """update every epoch"""
+    tot_steps = steps_per_epoch * epochs
+    lrs = []
+
+    for c in range(num_cycles):
+        lr_max = eta_max * (cycle_decay**c)
+        delta = 0.5 * (lr_max - eta_min)
+        for i in range(steps_per_epoch * decay_epochs):
+            t_cur = math.floor(i / steps_per_epoch)
+            t_cur = min(t_cur, decay_epochs)
+            lr_cur = eta_min + delta * (1.0 + math.cos(math.pi * t_cur / decay_epochs))
+            if len(lrs) < tot_steps:
+                lrs.append(lr_cur)
+            else:
+                break
+
+    if epochs > num_cycles * decay_epochs:
+        for i in range((epochs - (num_cycles * decay_epochs)) * steps_per_epoch):
+            lrs.append(eta_min)
+
+    return lrs
+
+
+
+ +
+ + +
+ + + +

+mindcv.scheduler.dynamic_lr.cosine_decay_refined_lr(decay_epochs, eta_min, *, eta_max, steps_per_epoch, epochs, num_cycles=1, cycle_decay=1.0) + +

+ + +
+ +

update every step

+ +
+ Source code in mindcv/scheduler/dynamic_lr.py +
144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
def cosine_decay_refined_lr(decay_epochs, eta_min, *, eta_max, steps_per_epoch, epochs, num_cycles=1, cycle_decay=1.0):
+    """update every step"""
+    tot_steps = steps_per_epoch * epochs
+    lrs = []
+
+    for c in range(num_cycles):
+        lr_max = eta_max * (cycle_decay**c)
+        delta = 0.5 * (lr_max - eta_min)
+        for i in range(steps_per_epoch * decay_epochs):
+            t_cur = i / steps_per_epoch
+            t_cur = min(t_cur, decay_epochs)
+            lr_cur = eta_min + delta * (1.0 + math.cos(math.pi * t_cur / decay_epochs))
+            if len(lrs) < tot_steps:
+                lrs.append(lr_cur)
+            else:
+                break
+
+    if epochs > num_cycles * decay_epochs:
+        for i in range((epochs - (num_cycles * decay_epochs)) * steps_per_epoch):
+            lrs.append(eta_min)
+
+    return lrs
+
+
+
+ +
+ + +
+ + + +

+mindcv.scheduler.dynamic_lr.cyclic_lr(base_lr, max_lr, step_size_up=2000, step_size_down=None, mode='triangular', gamma=1.0, scale_fn=None, scale_mode='cycle', *, steps_per_epoch, epochs) + +

+ + +
+ +

Cyclic learning rate scheduler based on +'"Cyclical Learning Rates for Training Neural Networks" https://arxiv.org/abs/1708.07120'

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
base_lr +
+

Lower learning rate boundaries in each cycle.

+
+

+ + TYPE: + float + +

+
max_lr +
+

Upper learning rate boundaries in each cycle.

+
+

+ + TYPE: + float + +

+
step_size_up +
+

Number of steps in the increasing half in each cycle. Default: 2000.

+
+

+ + TYPE: + int + + + DEFAULT: + 2000 + +

+
step_size_down +
+

Number of steps in the increasing half in each cycle. If step_size_down +is None, it's set to step_size_up. Default: None.

+
+

+ + DEFAULT: + None + +

+
div_factor +
+

Initial learning rate via initial_lr = max_lr / div_factor. +Default: 25.0.

+
+

+

+
final_div_factor +
+

Minimum learning rate at the end via +min_lr = initial_lr / final_div_factor. Default: 10000.0.

+
+

+

+
mode +
+

One of {triangular, triangular2, exp_range}. If scale_fn is not None, it's set to +None. Default: 'triangular'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'triangular' + +

+
gamma +
+

Constant in 'exp_range' calculating fuction: gamma**(cycle_iterations). +Default: 1.0

+
+

+ + DEFAULT: + 1.0 + +

+
scale_fn +
+

Custom scaling policy defined by a single argument lambda function. If it's +not None, 'mode' is ignored. Default: None

+
+

+ + DEFAULT: + None + +

+
scale_mode +
+

One of {'cycle', 'iterations'}. Determine scale_fn is evaluated on cycle +number or cycle iterations. Default: 'cycle'

+
+

+ + DEFAULT: + 'cycle' + +

+
steps_per_epoch +
+

Number of steps per epoch.

+
+

+ + TYPE: + int + +

+
epochs +
+

Number of total epochs.

+
+

+ + TYPE: + int + +

+
+ +
+ Source code in mindcv/scheduler/dynamic_lr.py +
266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
def cyclic_lr(
+    base_lr: float,
+    max_lr: float,
+    step_size_up: int = 2000,
+    step_size_down=None,
+    mode: str = "triangular",
+    gamma=1.0,
+    scale_fn=None,
+    scale_mode="cycle",
+    *,
+    steps_per_epoch: int,
+    epochs: int,
+):
+    """
+    Cyclic learning rate scheduler based on
+    '"Cyclical Learning Rates for Training Neural Networks" <https://arxiv.org/abs/1708.07120>'
+
+    Args:
+        base_lr: Lower learning rate boundaries in each cycle.
+        max_lr: Upper learning rate boundaries in each cycle.
+        step_size_up: Number of steps in the increasing half in each cycle. Default: 2000.
+        step_size_down: Number of steps in the increasing half in each cycle. If step_size_down
+            is None, it's set to step_size_up. Default: None.
+        div_factor: Initial learning rate via initial_lr = max_lr / div_factor.
+            Default: 25.0.
+        final_div_factor: Minimum learning rate at the end via
+            min_lr = initial_lr / final_div_factor. Default: 10000.0.
+        mode: One of {triangular, triangular2, exp_range}. If scale_fn is not None, it's set to
+            None. Default: 'triangular'.
+        gamma: Constant in 'exp_range' calculating fuction: gamma**(cycle_iterations).
+            Default: 1.0
+        scale_fn: Custom scaling policy defined by a single argument lambda function. If it's
+            not None, 'mode' is ignored. Default: None
+        scale_mode: One of {'cycle', 'iterations'}. Determine scale_fn is evaluated on cycle
+            number or cycle iterations. Default: 'cycle'
+        steps_per_epoch: Number of steps per epoch.
+        epochs: Number of total epochs.
+    """
+
+    def _triangular_scale_fn(x):
+        return 1.0
+
+    def _triangular2_scale_fn(x):
+        return 1 / (2.0**(x - 1))
+
+    def _exp_range_scale_fn(x):
+        return gamma**x
+
+    steps = steps_per_epoch * epochs
+    step_size_up = float(step_size_up)
+    step_size_down = float(step_size_down) if step_size_down is not None else step_size_up
+    total_size = step_size_up + step_size_down
+    step_ratio = step_size_up / total_size
+    if scale_fn is None:
+        if mode == "triangular":
+            scale_fn = _triangular_scale_fn
+            scale_mode = "cycle"
+        elif mode == "triangular2":
+            scale_fn = _triangular2_scale_fn
+            scale_mode = "cycle"
+        elif mode == "exp_range":
+            scale_fn = _exp_range_scale_fn
+            scale_mode = "iterations"
+    lrs = []
+    for i in range(steps):
+        cycle = math.floor(1 + i / total_size)
+        x = 1.0 + i / total_size - cycle
+        if x <= step_ratio:
+            scale_factor = x / step_ratio
+        else:
+            scale_factor = (x - 1) / (step_ratio - 1)
+        base_height = (max_lr - base_lr) * scale_factor
+        if scale_mode == "cycle":
+            lrs.append(base_lr + base_height * scale_fn(cycle))
+        else:
+            lrs.append(base_lr + base_height * scale_fn(i))
+    return lrs
+
+
+
+ +
+ + +
+ + + +

+mindcv.scheduler.dynamic_lr.one_cycle_lr(max_lr, pct_start=0.3, anneal_strategy='cos', div_factor=25.0, final_div_factor=10000.0, three_phase=False, *, steps_per_epoch, epochs) + +

+ + +
+ +

OneCycle learning rate scheduler based on +'"Super-Convergence: Very Fast Training of Neural Networks Using Large Learning Rates" +https://arxiv.org/abs/1708.07120'

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
max_lr +
+

Upper learning rate boundaries in the cycle.

+
+

+ + TYPE: + float + +

+
pct_start +
+

The percentage of the number of steps of increasing learning rate +in the cycle. Default: 0.3.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.3 + +

+
anneal_strategy +
+

Define the annealing strategy: "cos" for cosine annealing, +"linear" for linear annealing. Default: "cos".

+
+

+ + TYPE: + str + + + DEFAULT: + 'cos' + +

+
div_factor +
+

Initial learning rate via initial_lr = max_lr / div_factor. +Default: 25.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 25.0 + +

+
final_div_factor +
+

Minimum learning rate at the end via +min_lr = initial_lr / final_div_factor. Default: 10000.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 10000.0 + +

+
three_phase +
+

If True, learning rate will be updated by three-phase according to +"final_div_factor". Otherwise, learning rate will be updated by two-phase. +Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
steps_per_epoch +
+

Number of steps per epoch.

+
+

+ + TYPE: + int + +

+
epochs +
+

Number of total epochs.

+
+

+ + TYPE: + int + +

+
+ +
+ Source code in mindcv/scheduler/dynamic_lr.py +
197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
def one_cycle_lr(
+    max_lr: float,
+    pct_start: float = 0.3,
+    anneal_strategy: str = "cos",
+    div_factor: float = 25.0,
+    final_div_factor: float = 10000.0,
+    three_phase: bool = False,
+    *,
+    steps_per_epoch: int,
+    epochs: int,
+):
+    """
+    OneCycle learning rate scheduler based on
+    '"Super-Convergence: Very Fast Training of Neural Networks Using Large Learning Rates"
+    <https://arxiv.org/abs/1708.07120>'
+
+    Args:
+        max_lr: Upper learning rate boundaries in the cycle.
+        pct_start: The percentage of the number of steps of increasing learning rate
+            in the cycle. Default: 0.3.
+        anneal_strategy: Define the annealing strategy: "cos" for cosine annealing,
+            "linear" for linear annealing. Default: "cos".
+        div_factor: Initial learning rate via initial_lr = max_lr / div_factor.
+            Default: 25.0.
+        final_div_factor: Minimum learning rate at the end via
+            min_lr = initial_lr / final_div_factor. Default: 10000.0.
+        three_phase: If True, learning rate will be updated by three-phase according to
+            "final_div_factor". Otherwise, learning rate will be updated by two-phase.
+            Default: False.
+        steps_per_epoch: Number of steps per epoch.
+        epochs: Number of total epochs.
+    """
+
+    def _annealing_cos(start, end, pct):
+        cos_out = math.cos(math.pi * pct) + 1
+        return end + (start - end) / 2.0 * cos_out
+
+    def _annealing_linear(start, end, pct):
+        return (end - start) * pct + start
+
+    initial_lr = max_lr / div_factor
+    min_lr = initial_lr / final_div_factor
+    steps = steps_per_epoch * epochs
+    step_size_up = float(pct_start * steps) - 1
+    step_size_down = float(2 * pct_start * steps) - 2
+    step_size_end = float(steps) - 1
+    if anneal_strategy == "cos":
+        anneal_func = _annealing_cos
+    elif anneal_strategy == "linear":
+        anneal_func = _annealing_linear
+    else:
+        raise ValueError(f"anneal_strategy must be one of 'cos' or 'linear', but got {anneal_strategy}")
+    lrs = []
+    for i in range(steps):
+        if three_phase:
+            if i <= step_size_up:
+                lrs.append(anneal_func(initial_lr, max_lr, i / step_size_up))
+            elif step_size_up < i <= step_size_down:
+                lrs.append(anneal_func(max_lr, initial_lr, (i - step_size_up) / (step_size_down - step_size_up)))
+            else:
+                lrs.append(anneal_func(initial_lr, min_lr, (i - step_size_down) / (step_size_end - step_size_down)))
+        else:
+            if i <= step_size_up:
+                lrs.append(anneal_func(initial_lr, max_lr, i / step_size_up))
+            else:
+                lrs.append(anneal_func(max_lr, min_lr, (i - step_size_up) / (step_size_end - step_size_up)))
+    return lrs
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/reference/utils/index.html b/reference/utils/index.html new file mode 100644 index 000000000..2436f4d81 --- /dev/null +++ b/reference/utils/index.html @@ -0,0 +1,3282 @@ + + + + + + + + + + + + + + + + + + + + + + + + utils - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Utility

+

Logger

+ + + +
+ + + +

+mindcv.utils.logger.set_logger(name=None, output_dir=None, rank=0, log_level=logging.INFO, color=True) + +

+ + +
+ +

Initialize the logger.

+

If the logger has not been initialized, this method will initialize the +logger by adding one or two handlers, otherwise the initialized logger will +be directly returned. During initialization, only logger of the master +process is added console handler. If output_dir is specified, all loggers +will be added file handler.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
name +
+

Logger name. Defaults to None to set up root logger.

+
+

+ + TYPE: + Optional[str] + + + DEFAULT: + None + +

+
output_dir +
+

The directory to save log.

+
+

+ + TYPE: + Optional[str] + + + DEFAULT: + None + +

+
rank +
+

Process rank in the distributed training. Defaults to 0.

+
+

+ + TYPE: + int + + + DEFAULT: + 0 + +

+
log_level +
+

Verbosity level of the logger. Defaults to logging.INFO.

+
+

+ + TYPE: + int + + + DEFAULT: + logging.INFO + +

+
color +
+

If True, color the output. Defaults to True.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + logging.Logger + + +
+

logging.Logger: A initialized logger.

+
+
+ +
+ Source code in mindcv/utils/logger.py +
 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
def set_logger(
+    name: Optional[str] = None,
+    output_dir: Optional[str] = None,
+    rank: int = 0,
+    log_level: int = logging.INFO,
+    color: bool = True,
+) -> logging.Logger:
+    """Initialize the logger.
+
+    If the logger has not been initialized, this method will initialize the
+    logger by adding one or two handlers, otherwise the initialized logger will
+    be directly returned. During initialization, only logger of the master
+    process is added console handler. If ``output_dir`` is specified, all loggers
+    will be added file handler.
+
+    Args:
+        name: Logger name. Defaults to None to set up root logger.
+        output_dir: The directory to save log.
+        rank: Process rank in the distributed training. Defaults to 0.
+        log_level: Verbosity level of the logger. Defaults to ``logging.INFO``.
+        color: If True, color the output. Defaults to True.
+
+    Returns:
+        logging.Logger: A initialized logger.
+    """
+    if name in logger_initialized:
+        return logger_initialized[name]
+
+    # get root logger if name is None
+    logger = logging.getLogger(name)
+    logger.setLevel(log_level)
+    # the messages of this logger will not be propagated to its parent
+    logger.propagate = False
+
+    fmt = "%(asctime)s %(name)s %(levelname)s - %(message)s"
+    datefmt = "[%Y-%m-%d %H:%M:%S]"
+
+    # create console handler for master process
+    if rank == 0:
+        if color:
+            if has_rich:
+                console_handler = RichHandler(level=log_level, log_time_format=datefmt)
+            elif has_termcolor:
+                console_handler = logging.StreamHandler(stream=sys.stdout)
+                console_handler.setLevel(log_level)
+                console_handler.setFormatter(_ColorfulFormatter(fmt=fmt, datefmt=datefmt))
+            else:
+                raise NotImplementedError("If you want color, 'rich' or 'termcolor' has to be installed!")
+        else:
+            console_handler = logging.StreamHandler(stream=sys.stdout)
+            console_handler.setLevel(log_level)
+            console_handler.setFormatter(logging.Formatter(fmt=fmt, datefmt=datefmt))
+        logger.addHandler(console_handler)
+
+    if output_dir is not None:
+        os.makedirs(output_dir, exist_ok=True)
+        file_handler = logging.FileHandler(os.path.join(output_dir, f"rank{rank}.log"))
+        file_handler.setLevel(log_level)
+        file_handler.setFormatter(logging.Formatter(fmt=fmt, datefmt=datefmt))
+        logger.addHandler(file_handler)
+
+    logger_initialized[name] = logger
+    return logger
+
+
+
+ +

Callbacks

+ + +
+ + + +

+ mindcv.utils.callbacks.StateMonitor + + +

+ + +
+

+ Bases: Callback

+ + +

Train loss and validation accuracy monitor, after each epoch save the +best checkpoint file with the highest validation accuracy.

+ +
+ Source code in mindcv/utils/callbacks.py +
 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
class StateMonitor(Callback):
+    """
+    Train loss and validation accuracy monitor, after each epoch save the
+    best checkpoint file with the highest validation accuracy.
+    """
+
+    def __init__(
+        self,
+        model,
+        model_name="",
+        model_ema=False,
+        last_epoch=0,
+        dataset_sink_mode=True,
+        dataset_val=None,
+        metric_name=("accuracy",),
+        val_interval=1,
+        val_start_epoch=1,
+        save_best_ckpt=True,
+        ckpt_save_dir="./",
+        ckpt_save_interval=1,
+        ckpt_save_policy=None,
+        ckpt_keep_max=10,
+        summary_dir="./",
+        log_interval=100,
+        rank_id=None,
+        device_num=None,
+    ):
+        super().__init__()
+        # model
+        self.model = model
+        self.model_name = model_name
+        self.model_ema = model_ema
+        self.last_epoch = last_epoch
+        self.dataset_sink_mode = dataset_sink_mode
+        # evaluation
+        self.dataset_val = dataset_val
+        self.metric_name = metric_name
+        self.val_interval = val_interval
+        self.val_start_epoch = val_start_epoch
+        # logging
+        self.best_res = 0
+        self.best_epoch = -1
+        self.save_best_ckpt = save_best_ckpt
+        self.ckpt_save_dir = ckpt_save_dir
+        self.ckpt_save_interval = ckpt_save_interval
+        self.ckpt_save_policy = ckpt_save_policy
+        self.ckpt_keep_max = ckpt_keep_max
+        self.ckpt_manager = CheckpointManager(ckpt_save_policy=self.ckpt_save_policy)
+        self._need_flush_from_cache = True
+        self.summary_dir = summary_dir
+        self.log_interval = log_interval
+        # system
+        self.rank_id = rank_id if rank_id is not None else 0
+        self.device_num = device_num if rank_id is not None else 1
+        if self.rank_id in [0, None]:
+            os.makedirs(ckpt_save_dir, exist_ok=True)
+            self.log_file = os.path.join(ckpt_save_dir, "result.log")
+            log_line = "".join(
+                f"{s:<20}" for s in ["Epoch", "TrainLoss", *metric_name, "TrainTime", "EvalTime", "TotalTime"]
+            )
+            with open(self.log_file, "w", encoding="utf-8") as fp:  # writing the title of result.log
+                fp.write(log_line + "\n")
+        if self.device_num > 1:
+            self.all_reduce = AllReduceSum()
+        # timestamp
+        self.step_ts = None
+        self.epoch_ts = None
+        self.step_time_accum = 0
+        # model_ema
+        if self.model_ema:
+            self.hyper_map = ops.HyperMap()
+            self.online_params = ParameterTuple(self.model.train_network.get_parameters())
+            self.swap_params = self.online_params.clone("swap", "zeros")
+
+    def __enter__(self):
+        self.summary_record = SummaryRecord(self.summary_dir)
+        return self
+
+    def __exit__(self, *exc_args):
+        self.summary_record.close()
+
+    def apply_eval(self, run_context):
+        """Model evaluation, return validation accuracy."""
+        if self.model_ema:
+            cb_params = run_context.original_args()
+            self.hyper_map(ops.assign, self.swap_params, self.online_params)
+            ema_dict = dict()
+            net = self._get_network_from_cbp(cb_params)
+            for param in net.get_parameters():
+                if param.name.startswith("ema"):
+                    new_name = param.name.split("ema.")[1]
+                    ema_dict[new_name] = param.data
+            load_param_into_net(self.model.train_network.network, ema_dict)
+            res_dict = self.model.eval(self.dataset_val, dataset_sink_mode=False)
+            self.hyper_map(ops.assign, self.online_params, self.swap_params)
+        else:
+            res_dict = self.model.eval(self.dataset_val, dataset_sink_mode=False)
+        res_array = ms.Tensor(list(res_dict.values()), ms.float32)
+        if self.device_num > 1:
+            res_array = self.all_reduce(res_array)
+            res_array /= self.device_num
+        res_array = res_array.asnumpy()
+        return res_array
+
+    def on_train_step_begin(self, run_context):
+        self.step_ts = time()
+
+    def on_train_epoch_begin(self, run_context):
+        self.epoch_ts = time()
+
+    def on_train_step_end(self, run_context):
+        cb_params = run_context.original_args()
+        num_epochs = cb_params.epoch_num
+        num_batches = cb_params.batch_num
+        # num_steps = num_batches * num_epochs
+        # cur_x start from 1, end at num_xs, range: [1, num_xs]
+        cur_step = cb_params.cur_step_num + self.last_epoch * num_batches
+        cur_epoch = cb_params.cur_epoch_num + self.last_epoch
+        cur_batch = (cur_step - 1) % num_batches + 1
+
+        self.step_time_accum += time() - self.step_ts
+        if cur_batch % self.log_interval == 0 or cur_batch == num_batches or cur_batch == 1:
+            lr = self._get_lr_from_cbp(cb_params)
+            loss = self._get_loss_from_cbp(cb_params)
+            _logger.info(
+                f"Epoch: [{cur_epoch}/{num_epochs}], "
+                f"batch: [{cur_batch}/{num_batches}], "
+                f"loss: {loss.asnumpy():.6f}, "
+                f"lr: {lr.asnumpy():.6f}, "
+                f"time: {self.step_time_accum:.6f}s"
+            )
+            self.step_time_accum = 0
+
+    def on_train_epoch_end(self, run_context):
+        """
+        After epoch, print train loss and val accuracy,
+        save the best ckpt file with the highest validation accuracy.
+        """
+        cb_params = run_context.original_args()
+        num_epochs = cb_params.epoch_num
+        num_batches = cb_params.batch_num
+        cur_step = cb_params.cur_step_num + self.last_epoch * num_batches
+        cur_epoch = cb_params.cur_epoch_num + self.last_epoch
+        cur_batch = (cur_step - 1) % num_batches + 1
+
+        train_time = time() - self.epoch_ts
+        loss = self._get_loss_from_cbp(cb_params)
+
+        val_time = 0
+        res = np.zeros(len(self.metric_name), dtype=np.float32)
+        # val while training if validation loader is not None
+        if (
+            self.dataset_val is not None
+            and cur_epoch >= self.val_start_epoch
+            and (cur_epoch - self.val_start_epoch) % self.val_interval == 0
+        ):
+            val_time = time()
+            res = self.apply_eval(run_context)
+            val_time = time() - val_time
+            # record val acc
+            metric_str = "Validation "
+            for i in range(len(self.metric_name)):
+                metric_str += f"{self.metric_name[i]}: {res[i]:.4%}, "
+            metric_str += f"time: {val_time:.6f}s"
+            _logger.info(metric_str)
+            # save the best ckpt file
+            if res[0] > self.best_res:
+                self.best_res = res[0]
+                self.best_epoch = cur_epoch
+                _logger.info(f"=> New best val acc: {res[0]:.4%}")
+
+        # save checkpoint
+        if self.rank_id in [0, None]:
+            if self.save_best_ckpt and self.best_epoch == cur_epoch:  # always save ckpt if cur epoch got best acc
+                best_ckpt_save_path = os.path.join(self.ckpt_save_dir, f"{self.model_name}_best.ckpt")
+                save_checkpoint(cb_params.train_network, best_ckpt_save_path, async_save=True)
+            if (cur_epoch % self.ckpt_save_interval == 0) or (cur_epoch == num_epochs):
+                if self._need_flush_from_cache:
+                    self._flush_from_cache(cb_params)
+                # save optim for resume
+                optimizer = self._get_optimizer_from_cbp(cb_params)
+                optim_save_path = os.path.join(self.ckpt_save_dir, f"optim_{self.model_name}.ckpt")
+                save_checkpoint(optimizer, optim_save_path, async_save=True)
+                # keep checkpoint files number equal max number.
+                ckpt_save_path = os.path.join(self.ckpt_save_dir, f"{self.model_name}-{cur_epoch}_{cur_batch}.ckpt")
+                _logger.info(f"Saving model to {ckpt_save_path}")
+                self.ckpt_manager.save_ckpoint(
+                    cb_params.train_network,
+                    num_ckpt=self.ckpt_keep_max,
+                    metric=res[0],
+                    save_path=ckpt_save_path,
+                )
+
+        # logging
+        total_time = time() - self.epoch_ts
+        _logger.info(
+            f"Total time since last epoch: {total_time:.6f}(train: {train_time:.6f}, val: {val_time:.6f})s, "
+            f"ETA: {(num_epochs - cur_epoch) * total_time:.6f}s"
+        )
+        _logger.info("-" * 80)
+        if self.rank_id in [0, None]:
+            log_line = "".join(
+                f"{s:<20}"
+                for s in [
+                    f"{cur_epoch}",
+                    f"{loss.asnumpy():.6f}",
+                    *[f"{i:.4%}" for i in res],
+                    f"{train_time:.2f}",
+                    f"{val_time:.2f}",
+                    f"{total_time:.2f}",
+                ]
+            )
+            with open(self.log_file, "a", encoding="utf-8") as fp:
+                fp.write(log_line + "\n")
+
+        # summary
+        self.summary_record.add_value("scalar", f"train_loss_{self.rank_id}", loss)
+        for i in range(len(res)):
+            self.summary_record.add_value(
+                "scalar", f"val_{self.metric_name[i]}_{self.rank_id}", Tensor(res[i], dtype=ms.float32)
+            )
+        self.summary_record.record(cur_step)
+
+    def on_train_end(self, run_context):
+        _logger.info("Finish training!")
+        if self.dataset_val is not None:
+            _logger.info(
+                f"The best validation {self.metric_name[0]} is: {self.best_res:.4%} at epoch {self.best_epoch}."
+            )
+        _logger.info("=" * 80)
+
+    def _get_network_from_cbp(self, cb_params):
+        if self.dataset_sink_mode:
+            network = cb_params.train_network.network
+        else:
+            network = cb_params.train_network
+        return network
+
+    def _get_optimizer_from_cbp(self, cb_params):
+        if cb_params.optimizer is not None:
+            optimizer = cb_params.optimizer
+        elif self.dataset_sink_mode:
+            optimizer = cb_params.train_network.network.optimizer
+        else:
+            optimizer = cb_params.train_network.optimizer
+        return optimizer
+
+    def _get_lr_from_cbp(self, cb_params):
+        optimizer = self._get_optimizer_from_cbp(cb_params)
+        if optimizer.global_step < 1:
+            _logger.warning(
+                "`global_step` of optimizer is less than 1. It seems to be a overflow at the first step. "
+                "If you keep seeing this message, it means that the optimizer never actually called."
+            )
+            optim_step = Tensor((0,), ms.int32)
+        else:  # if the optimizer is successfully called, the global_step will actually be the value of next step.
+            optim_step = optimizer.global_step - 1
+        if optimizer.dynamic_lr:
+            lr = optimizer.learning_rate(optim_step)[0]
+        else:
+            lr = optimizer.learning_rate
+        return lr
+
+    def _get_loss_from_cbp(self, cb_params):
+        """
+        Get loss from the network output.
+        Args:
+            cb_params (_InternalCallbackParam): Callback parameters.
+        Returns:
+            Union[Tensor, None], if parse loss success, will return a Tensor value(shape is [1]), else return None.
+        """
+        output = cb_params.net_outputs
+        if output is None:
+            _logger.warning("Can not find any output by this network, so SummaryCollector will not collect loss.")
+            return None
+
+        if isinstance(output, (int, float, Tensor)):
+            loss = output
+        elif isinstance(output, (list, tuple)) and output:
+            # If the output is a list, since the default network returns loss first,
+            # we assume that the first one is loss.
+            loss = output[0]
+        else:
+            _logger.warning(
+                "The output type could not be identified, expect type is one of "
+                "[int, float, Tensor, list, tuple], so no loss was recorded in SummaryCollector."
+            )
+            return None
+
+        if not isinstance(loss, Tensor):
+            loss = Tensor(loss)
+
+        loss = Tensor(np.mean(loss.asnumpy()))
+        return loss
+
+    def _flush_from_cache(self, cb_params):
+        """Flush cache data to host if tensor is cache enable."""
+        has_cache_params = False
+        params = cb_params.train_network.get_parameters()
+        for param in params:
+            if param.cache_enable:
+                has_cache_params = True
+                Tensor(param).flush_from_cache()
+        if not has_cache_params:
+            self._need_flush_from_cache = False
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.utils.callbacks.StateMonitor.apply_eval(run_context) + +

+ + +
+ +

Model evaluation, return validation accuracy.

+ +
+ Source code in mindcv/utils/callbacks.py +
104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
def apply_eval(self, run_context):
+    """Model evaluation, return validation accuracy."""
+    if self.model_ema:
+        cb_params = run_context.original_args()
+        self.hyper_map(ops.assign, self.swap_params, self.online_params)
+        ema_dict = dict()
+        net = self._get_network_from_cbp(cb_params)
+        for param in net.get_parameters():
+            if param.name.startswith("ema"):
+                new_name = param.name.split("ema.")[1]
+                ema_dict[new_name] = param.data
+        load_param_into_net(self.model.train_network.network, ema_dict)
+        res_dict = self.model.eval(self.dataset_val, dataset_sink_mode=False)
+        self.hyper_map(ops.assign, self.online_params, self.swap_params)
+    else:
+        res_dict = self.model.eval(self.dataset_val, dataset_sink_mode=False)
+    res_array = ms.Tensor(list(res_dict.values()), ms.float32)
+    if self.device_num > 1:
+        res_array = self.all_reduce(res_array)
+        res_array /= self.device_num
+    res_array = res_array.asnumpy()
+    return res_array
+
+
+
+ +
+ + +
+ + + +

+mindcv.utils.callbacks.StateMonitor.on_train_epoch_end(run_context) + +

+ + +
+ +

After epoch, print train loss and val accuracy, +save the best ckpt file with the highest validation accuracy.

+ +
+ Source code in mindcv/utils/callbacks.py +
156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
def on_train_epoch_end(self, run_context):
+    """
+    After epoch, print train loss and val accuracy,
+    save the best ckpt file with the highest validation accuracy.
+    """
+    cb_params = run_context.original_args()
+    num_epochs = cb_params.epoch_num
+    num_batches = cb_params.batch_num
+    cur_step = cb_params.cur_step_num + self.last_epoch * num_batches
+    cur_epoch = cb_params.cur_epoch_num + self.last_epoch
+    cur_batch = (cur_step - 1) % num_batches + 1
+
+    train_time = time() - self.epoch_ts
+    loss = self._get_loss_from_cbp(cb_params)
+
+    val_time = 0
+    res = np.zeros(len(self.metric_name), dtype=np.float32)
+    # val while training if validation loader is not None
+    if (
+        self.dataset_val is not None
+        and cur_epoch >= self.val_start_epoch
+        and (cur_epoch - self.val_start_epoch) % self.val_interval == 0
+    ):
+        val_time = time()
+        res = self.apply_eval(run_context)
+        val_time = time() - val_time
+        # record val acc
+        metric_str = "Validation "
+        for i in range(len(self.metric_name)):
+            metric_str += f"{self.metric_name[i]}: {res[i]:.4%}, "
+        metric_str += f"time: {val_time:.6f}s"
+        _logger.info(metric_str)
+        # save the best ckpt file
+        if res[0] > self.best_res:
+            self.best_res = res[0]
+            self.best_epoch = cur_epoch
+            _logger.info(f"=> New best val acc: {res[0]:.4%}")
+
+    # save checkpoint
+    if self.rank_id in [0, None]:
+        if self.save_best_ckpt and self.best_epoch == cur_epoch:  # always save ckpt if cur epoch got best acc
+            best_ckpt_save_path = os.path.join(self.ckpt_save_dir, f"{self.model_name}_best.ckpt")
+            save_checkpoint(cb_params.train_network, best_ckpt_save_path, async_save=True)
+        if (cur_epoch % self.ckpt_save_interval == 0) or (cur_epoch == num_epochs):
+            if self._need_flush_from_cache:
+                self._flush_from_cache(cb_params)
+            # save optim for resume
+            optimizer = self._get_optimizer_from_cbp(cb_params)
+            optim_save_path = os.path.join(self.ckpt_save_dir, f"optim_{self.model_name}.ckpt")
+            save_checkpoint(optimizer, optim_save_path, async_save=True)
+            # keep checkpoint files number equal max number.
+            ckpt_save_path = os.path.join(self.ckpt_save_dir, f"{self.model_name}-{cur_epoch}_{cur_batch}.ckpt")
+            _logger.info(f"Saving model to {ckpt_save_path}")
+            self.ckpt_manager.save_ckpoint(
+                cb_params.train_network,
+                num_ckpt=self.ckpt_keep_max,
+                metric=res[0],
+                save_path=ckpt_save_path,
+            )
+
+    # logging
+    total_time = time() - self.epoch_ts
+    _logger.info(
+        f"Total time since last epoch: {total_time:.6f}(train: {train_time:.6f}, val: {val_time:.6f})s, "
+        f"ETA: {(num_epochs - cur_epoch) * total_time:.6f}s"
+    )
+    _logger.info("-" * 80)
+    if self.rank_id in [0, None]:
+        log_line = "".join(
+            f"{s:<20}"
+            for s in [
+                f"{cur_epoch}",
+                f"{loss.asnumpy():.6f}",
+                *[f"{i:.4%}" for i in res],
+                f"{train_time:.2f}",
+                f"{val_time:.2f}",
+                f"{total_time:.2f}",
+            ]
+        )
+        with open(self.log_file, "a", encoding="utf-8") as fp:
+            fp.write(log_line + "\n")
+
+    # summary
+    self.summary_record.add_value("scalar", f"train_loss_{self.rank_id}", loss)
+    for i in range(len(res)):
+        self.summary_record.add_value(
+            "scalar", f"val_{self.metric_name[i]}_{self.rank_id}", Tensor(res[i], dtype=ms.float32)
+        )
+    self.summary_record.record(cur_step)
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + +

+ mindcv.utils.callbacks.ValCallback + + +

+ + +
+

+ Bases: Callback

+ + +
+ Source code in mindcv/utils/callbacks.py +
330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
class ValCallback(Callback):
+    def __init__(self, log_interval=100):
+        super().__init__()
+        self.log_interval = log_interval
+        self.ts = time()
+
+    def on_eval_step_end(self, run_context):
+        cb_params = run_context.original_args()
+        num_batches = cb_params.batch_num
+        cur_step = cb_params.cur_step_num
+
+        if cur_step % self.log_interval == 0 or cur_step == num_batches:
+            print(f"batch: {cur_step}/{num_batches}, time: {time() - self.ts:.6f}s")
+            self.ts = time()
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Train Step

+ + +
+ + + +

+ mindcv.utils.train_step.TrainStep + + +

+ + +
+

+ Bases: nn.TrainOneStepWithLossScaleCell

+ + +

Training step with loss scale.

+ +
+ The customized trainOneStepCell also supported following algorithms +
    +
  • Exponential Moving Average (EMA)
  • +
  • Gradient Clipping
  • +
  • Gradient Accumulation
  • +
+
+
+ Source code in mindcv/utils/train_step.py +
 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
class TrainStep(nn.TrainOneStepWithLossScaleCell):
+    """Training step with loss scale.
+
+    The customized trainOneStepCell also supported following algorithms:
+        * Exponential Moving Average (EMA)
+        * Gradient Clipping
+        * Gradient Accumulation
+    """
+
+    def __init__(
+        self,
+        network,
+        optimizer,
+        scale_sense=1.0,
+        ema=False,
+        ema_decay=0.9999,
+        clip_grad=False,
+        clip_value=15.0,
+        gradient_accumulation_steps=1,
+    ):
+        super(TrainStep, self).__init__(network, optimizer, scale_sense)
+        self.ema = ema
+        self.ema_decay = ema_decay
+        self.updates = Parameter(Tensor(0.0, ms.float32))
+        self.clip_grad = clip_grad
+        self.clip_value = clip_value
+        if self.ema:
+            self.weights_all = ms.ParameterTuple(list(network.get_parameters()))
+            self.ema_weight = self.weights_all.clone("ema", init="same")
+
+        self.accumulate_grad = gradient_accumulation_steps > 1
+        if self.accumulate_grad:
+            self.gradient_accumulation = GradientAccumulation(gradient_accumulation_steps, optimizer, self.grad_reducer)
+
+    def ema_update(self):
+        self.updates += 1
+        # ema factor is corrected by (1 - exp(-t/T)), where `t` means time and `T` means temperature.
+        ema_decay = self.ema_decay * (1 - F.exp(-self.updates / 2000))
+        # update trainable parameters
+        success = self.hyper_map(F.partial(_ema_op, ema_decay), self.ema_weight, self.weights_all)
+        return success
+
+    def construct(self, *inputs):
+        weights = self.weights
+        loss = self.network(*inputs)
+        scaling_sens = self.scale_sense
+
+        status, scaling_sens = self.start_overflow_check(loss, scaling_sens)
+
+        scaling_sens_filled = ops.ones_like(loss) * F.cast(scaling_sens, F.dtype(loss))
+        grads = self.grad(self.network, weights)(*inputs, scaling_sens_filled)
+        grads = self.hyper_map(F.partial(_grad_scale, scaling_sens), grads)
+
+        # todo: When to clip grad? Do we need to clip grad after grad reduction? What if grad accumulation is needed?
+        if self.clip_grad:
+            grads = ops.clip_by_global_norm(grads, clip_norm=self.clip_value)
+
+        if self.loss_scaling_manager:  # scale_sense = update_cell: Cell --> TrainOneStepWithLossScaleCell.construct
+            if self.accumulate_grad:
+                # todo: GradientAccumulation only call grad_reducer at the step where the accumulation is completed.
+                #  So checking the overflow status is after gradient reduction, is this correct?
+                # get the overflow buffer
+                cond = self.get_overflow_status(status, grads)
+                overflow = self.process_loss_scale(cond)
+                # if there is no overflow, do optimize
+                if not overflow:
+                    loss = self.gradient_accumulation(loss, grads)
+            else:
+                # apply grad reducer on grads
+                grads = self.grad_reducer(grads)
+                # get the overflow buffer
+                cond = self.get_overflow_status(status, grads)
+                overflow = self.process_loss_scale(cond)
+                # if there is no overflow, do optimize
+                if not overflow:
+                    loss = F.depend(loss, self.optimizer(grads))
+        else:  # scale_sense = loss_scale: Tensor --> TrainOneStepCell.construct
+            if self.accumulate_grad:
+                loss = self.gradient_accumulation(loss, grads)
+            else:
+                grads = self.grad_reducer(grads)
+                loss = F.depend(loss, self.optimizer(grads))
+
+        if self.ema:
+            loss = F.depend(loss, self.ema_update())
+
+        return loss
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Trainer Factory

+ + + +
+ + + +

+mindcv.utils.trainer_factory.create_trainer(network, loss, optimizer, metrics, amp_level, amp_cast_list, loss_scale_type, loss_scale=1.0, drop_overflow_update=False, ema=False, ema_decay=0.9999, clip_grad=False, clip_value=15.0, gradient_accumulation_steps=1) + +

+ + +
+ +

Create Trainer.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
network +
+

The backbone network to train, evaluate or predict.

+
+

+ + TYPE: + nn.Cell + +

+
loss +
+

The function of calculating loss.

+
+

+ + TYPE: + nn.Cell + +

+
optimizer +
+

The optimizer for training.

+
+

+ + TYPE: + nn.Cell + +

+
metrics +
+

The metrics for model evaluation.

+
+

+ + TYPE: + Union[dict, set] + +

+
amp_level +
+

The level of auto mixing precision training.

+
+

+ + TYPE: + str + +

+
amp_cast_list +
+

At the cell level, custom casting the cell to FP16.

+
+

+ + TYPE: + str + +

+
loss_scale_type +
+

The type of loss scale.

+
+

+ + TYPE: + str + +

+
loss_scale +
+

The value of loss scale.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
drop_overflow_update +
+

Whether to execute optimizer if there is an overflow.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
ema +
+

Whether to use exponential moving average of model weights.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
ema_decay +
+

Decay factor for model weights moving average.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.9999 + +

+
clip_grad +
+

whether to gradient clip.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
clip_value +
+

The value at which to clip gradients.

+
+

+ + TYPE: + float + + + DEFAULT: + 15.0 + +

+
gradient_accumulation_steps +
+

Accumulate the gradients of n batches before update.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

mindspore.Model

+
+
+ +
+ Source code in mindcv/utils/trainer_factory.py +
 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
def create_trainer(
+    network: nn.Cell,
+    loss: nn.Cell,
+    optimizer: nn.Cell,
+    metrics: Union[dict, set],
+    amp_level: str,
+    amp_cast_list: str,
+    loss_scale_type: str,
+    loss_scale: float = 1.0,
+    drop_overflow_update: bool = False,
+    ema: bool = False,
+    ema_decay: float = 0.9999,
+    clip_grad: bool = False,
+    clip_value: float = 15.0,
+    gradient_accumulation_steps: int = 1,
+):
+    """Create Trainer.
+
+    Args:
+        network: The backbone network to train, evaluate or predict.
+        loss: The function of calculating loss.
+        optimizer: The optimizer for training.
+        metrics: The metrics for model evaluation.
+        amp_level: The level of auto mixing precision training.
+        amp_cast_list: At the cell level, custom casting the cell to FP16.
+        loss_scale_type: The type of loss scale.
+        loss_scale: The value of loss scale.
+        drop_overflow_update: Whether to execute optimizer if there is an overflow.
+        ema: Whether to use exponential moving average of model weights.
+        ema_decay: Decay factor for model weights moving average.
+        clip_grad: whether to gradient clip.
+        clip_value: The value at which to clip gradients.
+        gradient_accumulation_steps: Accumulate the gradients of n batches before update.
+
+    Returns:
+        mindspore.Model
+
+    """
+    if loss_scale < 1.0:
+        raise ValueError("Loss scale cannot be less than 1.0!")
+
+    if drop_overflow_update is False and loss_scale_type.lower() == "dynamic":
+        raise ValueError("DynamicLossScale ALWAYS drop overflow!")
+
+    if gradient_accumulation_steps < 1:
+        raise ValueError("`gradient_accumulation_steps` must be >= 1!")
+
+    if not require_customized_train_step(ema, clip_grad, gradient_accumulation_steps, amp_cast_list):
+        mindspore_kwargs = dict(
+            network=network,
+            loss_fn=loss,
+            optimizer=optimizer,
+            metrics=metrics,
+            amp_level=amp_level,
+        )
+        if loss_scale_type.lower() == "fixed":
+            mindspore_kwargs["loss_scale_manager"] = FixedLossScaleManager(
+                loss_scale=loss_scale, drop_overflow_update=drop_overflow_update
+            )
+        elif loss_scale_type.lower() == "dynamic":
+            mindspore_kwargs["loss_scale_manager"] = DynamicLossScaleManager(
+                init_loss_scale=loss_scale, scale_factor=2, scale_window=2000
+            )
+        elif loss_scale_type.lower() == "auto":
+            # We don't explicitly construct LossScaleManager
+            _logger.warning(
+                "You are using AUTO loss scale, which means the LossScaleManager isn't explicitly pass in "
+                "when creating a mindspore.Model instance. "
+                "NOTE: mindspore.Model may use LossScaleManager silently. See mindspore.train.amp for details."
+            )
+        else:
+            raise ValueError(f"Loss scale type only support ['fixed', 'dynamic', 'auto'], but got{loss_scale_type}.")
+        model = Model(**mindspore_kwargs)
+    else:  # require customized train step
+        eval_network = nn.WithEvalCell(network, loss, amp_level in ["O2", "O3", "auto"])
+        auto_mixed_precision(network, amp_level, amp_cast_list)
+        net_with_loss = add_loss_network(network, loss, amp_level)
+        train_step_kwargs = dict(
+            network=net_with_loss,
+            optimizer=optimizer,
+            ema=ema,
+            ema_decay=ema_decay,
+            clip_grad=clip_grad,
+            clip_value=clip_value,
+            gradient_accumulation_steps=gradient_accumulation_steps,
+        )
+        if loss_scale_type.lower() == "fixed":
+            loss_scale_manager = FixedLossScaleManager(loss_scale=loss_scale, drop_overflow_update=drop_overflow_update)
+        elif loss_scale_type.lower() == "dynamic":
+            loss_scale_manager = DynamicLossScaleManager(init_loss_scale=loss_scale, scale_factor=2, scale_window=2000)
+        else:
+            raise ValueError(f"Loss scale type only support ['fixed', 'dynamic'], but got{loss_scale_type}.")
+        update_cell = loss_scale_manager.get_update_cell()
+        # 1. loss_scale_type="fixed", drop_overflow_update=False
+        # --> update_cell=None, TrainStep=TrainOneStepCell(scale_sense=loss_scale)
+        # 2. loss_scale_type: fixed, drop_overflow_update: True
+        # --> update_cell=FixedLossScaleUpdateCell, TrainStep=TrainOneStepWithLossScaleCell(scale_sense=update_cell)
+        # 3. loss_scale_type: dynamic, drop_overflow_update: True
+        # --> update_cell=DynamicLossScaleUpdateCell, TrainStep=TrainOneStepWithLossScaleCell(scale_sense=update_cell)
+        if update_cell is None:
+            train_step_kwargs["scale_sense"] = Tensor(loss_scale, dtype=ms.float32)
+        else:
+            if not context.get_context("enable_ge") and context.get_context("device_target") == "CPU":
+                raise ValueError(
+                    "Only `loss_scale_type` is `fixed` and `drop_overflow_update` is `False`"
+                    "are supported on device `CPU`."
+                )
+            train_step_kwargs["scale_sense"] = update_cell
+        train_step_cell = TrainStep(**train_step_kwargs).set_train()
+        model = Model(train_step_cell, eval_network=eval_network, metrics=metrics, eval_indexes=[0, 1, 2])
+        # todo: do we need to set model._loss_scale_manager
+    return model
+
+
+
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/search/search_index.json b/search/search_index.json new file mode 100644 index 000000000..ab15f6314 --- /dev/null +++ b/search/search_index.json @@ -0,0 +1 @@ +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Home","text":""},{"location":"#mindcv","title":"MindCV","text":""},{"location":"#introduction","title":"Introduction","text":"

MindCV is an open-source toolbox for computer vision research and development based on MindSpore. It collects a series of classic and SoTA vision models, such as ResNet and SwinTransformer, along with their pre-trained weights and training strategies. SoTA methods such as auto augmentation are also provided for performance improvement. With the decoupled module design, it is easy to apply or adapt MindCV to your own CV tasks.

"},{"location":"#major-features","title":"Major Features","text":"
  • Easy-to-Use. MindCV decomposes the vision framework into various configurable components. It is easy to customize your data pipeline, models, and learning pipeline with MindCV:

    >>> import mindcv\n# create a dataset\n>>> dataset = mindcv.create_dataset('cifar10', download=True)\n# create a model\n>>> network = mindcv.create_model('resnet50', pretrained=True)\n

    Users can customize and launch their transfer learning or training task in one command line.

    # transfer learning in one command line\npython train.py --model=swin_tiny --pretrained --opt=adamw --lr=0.001 --data_dir=/path/to/data\n
  • State-of-The-Art. MindCV provides various CNN-based and Transformer-based vision models including SwinTransformer. Their pretrained weights and performance reports are provided to help users select and reuse the right model:

  • Flexibility and efficiency. MindCV is built on MindSpore which is an efficient DL framework that can be run on different hardware platforms (GPU/CPU/Ascend). It supports both graph mode for high efficiency and pynative mode for flexibility.

"},{"location":"#model-zoo","title":"Model Zoo","text":"

The performance of the models trained with MindCV is summarized in here, where the training recipes and weights are both available.

Model introduction and training details can be viewed in each sub-folder under configs.

"},{"location":"#installation","title":"Installation","text":"

See Installation for details.

"},{"location":"#getting-started","title":"Getting Started","text":""},{"location":"#hands-on-tutorial","title":"Hands-on Tutorial","text":"

To get started with MindCV, please see the Quick Start, which will give you a quick tour of each key component and the train/validate/predict pipelines.

Below are a few code snippets for your taste.

>>> import mindcv\n# List and find a pretrained vision model\n>>> mindcv.list_models(\"swin*\", pretrained=True)\n['swin_tiny']\n# Create the model object\n>>> network = mindcv.create_model('swin_tiny', pretrained=True)\n# Validate its accuracy\n>>> !python validate.py --model=swin_tiny --pretrained --dataset=imagenet --val_split=validation\n{'Top_1_Accuracy': 0.80824, 'Top_5_Accuracy': 0.94802, 'loss': 1.7331367141008378}\n
Image Classification Demo

Right click on the image below and save as dog.jpg.

Classify the downloaded image with a pretrained SoTA model:

>>> !python infer.py --model=swin_tiny --image_path='./dog.jpg'\n{'Labrador retriever': 0.5700152, 'golden retriever': 0.034551315, 'kelpie': 0.010108651, 'Chesapeake Bay retriever': 0.008229004, 'Walker hound, Walker foxhound': 0.007791956}\n

The top-1 prediction result is labrador retriever, which is the breed of this cut dog.

"},{"location":"#training","title":"Training","text":"

It is easy to train your model on a standard or customized dataset using train.py, where the training strategy (e.g., augmentation, LR scheduling) can be configured with external arguments or a yaml config file.

  • Standalone Training

    # standalone training\npython train.py --model=resnet50 --dataset=cifar10 --dataset_download\n

    Above is an example of training ResNet50 on CIFAR10 dataset on a CPU/GPU/Ascend device

  • Distributed Training

    For large datasets like ImageNet, it is necessary to do training in distributed mode on multiple devices. This can be achieved with mpirun and parallel features supported by MindSpore.

    # distributed training\n# assume you have 4 GPUs/NPUs\nmpirun -n 4 python train.py --distribute \\\n--model=densenet121 --dataset=imagenet --data_dir=/path/to/imagenet\n

    Notes: If the script is executed by the root user, the --allow-run-as-root parameter must be added to mpirun.

    Detailed parameter definitions can be seen in config.py and checked by running `python train.py --help'.

    To resume training, please set the --ckpt_path and --ckpt_save_dir arguments. The optimizer state including the learning rate of the last stopped epoch will also be recovered.

  • Config and Training Strategy

    You can configure your model and other components either by specifying external parameters or by writing a yaml config file. Here is an example of training using a preset yaml file.

    mpirun --allow-run-as-root -n 4 python train.py -c configs/squeezenet/squeezenet_1.0_gpu.yaml\n

    Pre-defined Training Strategies

    We provide more than 20 training recipes that achieve SoTA results on ImageNet currently. Please look into the configs folder for details. Please feel free to adapt these training strategies to your own model for performance improvement, which can be easily done by modifying the yaml file.

  • Train on ModelArts/OpenI Platform

    To run training on the ModelArts or OpenI cloud platform:

    1. Create a new training task on the cloud platform.\n2. Add the parameter `config` and specify the path to the yaml config file on the website UI interface.\n3. Add the parameter `enable_modelarts` and set True on the website UI interface.\n4. Fill in other blanks on the website and launch the training task.\n

Graph Mode and PyNative Mode

By default, the training pipeline train.py is run in graph mode on MindSpore, which is optimized for efficiency and parallel computing with a compiled static graph. In contrast, pynative mode is optimized for flexibility and easy debugging. You may alter the parameter --mode to switch to pure pynative mode for debugging purpose.

Mixed Mode

PyNative mode with mindspore.jit is a mixed mode for comprising flexibility and efficiency in MindSpore. To apply pynative mode with mindspore.jit for training, please run train_with_func.py, e.g.,

python train_with_func.py --model=resnet50 --dataset=cifar10 --dataset_download  --epoch_size=10\n

Note: this is an experimental function under improvement. It is not stable on MindSpore 1.8.1 or earlier versions.

"},{"location":"#validation","title":"Validation","text":"

To evaluate the model performance, please run validate.py

# validate a trained checkpoint\npython validate.py --model=resnet50 --dataset=imagenet --data_dir=/path/to/data --ckpt_path=/path/to/model.ckpt\n

Validation while Training

You can also track the validation accuracy during training by enabling the --val_while_train option.

python train.py --model=resnet50 --dataset=cifar10 \\\n--val_while_train --val_split=test --val_interval=1\n

The training loss and validation accuracy for each epoch will be saved in ${ckpt_save_dir}/results.log.

More examples about training and validation can be seen in examples.

"},{"location":"#tutorials","title":"Tutorials","text":"

We provide the following jupyter notebook tutorials to help users learn to use MindCV.

  • Learn about configs
  • Inference with a pretrained model
  • Finetune a pretrained model on custom datasets
  • Customize your model //coming soon
  • Optimizing performance for vision transformer //coming soon
  • Deployment demo
"},{"location":"#supported-algorithms","title":"Supported Algorithms","text":"Supported algorithms
  • Augmentation
    • AutoAugment
    • RandAugment
    • Repeated Augmentation
    • RandErasing (Cutout)
    • CutMix
    • MixUp
    • RandomResizeCrop
    • Color Jitter, Flip, etc
  • Optimizer
    • Adam
    • AdamW
    • Lion
    • Adan (experimental)
    • AdaGrad
    • LAMB
    • Momentum
    • RMSProp
    • SGD
    • NAdam
  • LR Scheduler
    • Warmup Cosine Decay
    • Step LR
    • Polynomial Decay
    • Exponential Decay
  • Regularization
    • Weight Decay
    • Label Smoothing
    • Stochastic Depth (depends on networks)
    • Dropout (depends on networks)
  • Loss
    • Cross Entropy (w/ class weight and auxiliary logit support)
    • Binary Cross Entropy (w/ class weight and auxiliary logit support)
    • Soft Cross Entropy Loss (automatically enabled if mixup or label smoothing is used)
    • Soft Binary Cross Entropy Loss (automatically enabled if mixup or label smoothing is used)
  • Ensemble
    • Warmup EMA (Exponential Moving Average)
"},{"location":"#how-to-contribute","title":"How to Contribute","text":"

We appreciate all kinds of contributions including issues and PRs to make MindCV better.

Please refer to CONTRIBUTING for the contributing guideline. Please follow the Model Template and Guideline for contributing a model that fits the overall interface :)

"},{"location":"#license","title":"License","text":"

This project follows the Apache License 2.0 open-source license.

"},{"location":"#acknowledgement","title":"Acknowledgement","text":"

MindCV is an open-source project jointly developed by the MindSpore team, Xidian University, and Xi'an Jiaotong University. Sincere thanks to all participating researchers and developers for their hard work on this project. We also acknowledge the computing resources provided by OpenI.

"},{"location":"#citation","title":"Citation","text":"

If you find this project useful in your research, please consider citing:

@misc{MindSpore Computer Vision 2022,\n    title={{MindSpore Computer  Vision}:MindSpore Computer Vision Toolbox and Benchmark},\n    author={MindSpore Vision Contributors},\n    howpublished = {\\url{https://github.com/mindspore-lab/mindcv/}},\n    year={2022}\n}\n
"},{"location":"installation/","title":"Installation","text":""},{"location":"installation/#dependency","title":"Dependency","text":"
  • mindspore >= 1.8.1
  • numpy >= 1.17.0
  • pyyaml >= 5.3
  • tqdm
  • openmpi 4.0.3 (for distributed mode)

To install the python library dependency, just run:

pip install -r requirements.txt\n

Tip

MindSpore can be easily installed by following the official instructions where you can select your hardware platform for the best fit. To run in distributed mode, OpenMPI is required to install.

The following instructions assume the desired dependency is fulfilled.

"},{"location":"installation/#install-with-pypi","title":"Install with PyPI","text":"

MindCV is published as a Python package and can be installed with pip, ideally by using a virtual environment. Open up a terminal and install MindCV with:

stablenightly
pip install mindcv\n
# working on it using test.pypi\n

This will automatically install compatible versions of dependencies: NumPy, PyYAML and tqdm.

Tip

If you don't have prior experience with Python, we recommend reading Using Python's pip to Manage Your Projects' Dependencies, which is a really good introduction to the mechanics of Python package management and helps you troubleshoot if you run into errors.

Warning

The above command will NOT install MindSpore. We highly recommend you install MindSpore following the official instructions.

"},{"location":"installation/#install-from-source-bleeding-edge-version","title":"Install from Source (Bleeding Edge Version)","text":""},{"location":"installation/#from-vcs","title":"from VCS","text":"
pip install git+https://github.com/mindspore-lab/mindcv.git\n
"},{"location":"installation/#from-local-src","title":"from local src","text":"

Tip

As this project is in active development, if you are a developer or contributor, please prefer this installation!

MindCV can be directly used from GitHub by cloning the repository into a local folder which might be useful if you want to use the very latest version:

git clone https://github.com/mindspore-lab/mindcv.git\n

After cloning from git, it is recommended that you install using \"editable\" mode, which can help resolve potential module import issues:

cd mindcv\npip install -e .\n
"},{"location":"modelzoo/","title":"Model Zoo","text":"Model Context Top-1 (%) Top-5 (%) Params(M) Recipe Download BiT_resnet50 D910x8-G 76.81 93.17 25.55 yaml weights BiT_resnet50x3 D910x8-G 80.63 95.12 217.31 yaml weights BiT_resnet101 D910x8-G 77.93 93.75 44.54 yaml weights coat_lite_tiny D910x8-G 77.35 93.43 5.72 yaml weights coat_lite_mini D910x8-G 78.51 93.84 11.01 yaml weights coat_tiny D910x8-G 79.67 94.88 5.50 yaml weights coat_mini D910x8-G 81.08 95.34 10.34 yaml weights convit_tiny D910x8-G 73.66 91.72 5.71 yaml weights convit_tiny_plus D910x8-G 77.00 93.60 9.97 yaml weights convit_small D910x8-G 81.63 95.59 27.78 yaml weights convit_small_plus D910x8-G 81.80 95.42 48.98 yaml weights convit_base D910x8-G 82.10 95.52 86.54 yaml weights convit_base_plus D910x8-G 81.96 95.04 153.13 yaml weights convnext_tiny D910x64-G 81.91 95.79 28.59 yaml weights convnext_small D910x64-G 83.40 96.36 50.22 yaml weights convnext_base D910x64-G 83.32 96.24 88.59 yaml weights convnextv2_tiny D910x8-G 82.43 95.98 28.64 yaml weights crossvit_9 D910x8-G 73.56 91.79 8.55 yaml weights crossvit_15 D910x8-G 81.08 95.33 27.27 yaml weights crossvit_18 D910x8-G 81.93 95.75 43.27 yaml weights densenet121 D910x8-G 75.64 92.84 8.06 yaml weights densenet161 D910x8-G 79.09 94.66 28.90 yaml weights densenet169 D910x8-G 77.26 93.71 14.31 yaml weights densenet201 D910x8-G 78.14 94.08 20.24 yaml weights dpn92 D910x8-G 79.46 94.49 37.79 yaml weights dpn98 D910x8-G 79.94 94.57 61.74 yaml weights dpn107 D910x8-G 80.05 94.74 87.13 yaml weights dpn131 D910x8-G 80.07 94.72 79.48 yaml weights edgenext_xx_small D910x8-G 71.02 89.99 1.33 yaml weights edgenext_x_small D910x8-G 75.14 92.50 2.34 yaml weights edgenext_small D910x8-G 79.15 94.39 5.59 yaml weights edgenext_base D910x8-G 82.24 95.94 18.51 yaml weights efficientnet_b0 D910x64-G 76.89 93.16 5.33 yaml weights efficientnet_b1 D910x64-G 78.95 94.34 7.86 yaml weights ghostnet_050 D910x8-G 66.03 86.64 2.60 yaml weights ghostnet_100 D910x8-G 73.78 91.66 5.20 yaml weights ghostnet_130 D910x8-G 75.50 92.56 7.39 yaml weights googlenet D910x8-G 72.68 90.89 6.99 yaml weights hrnet_w32 D910x8-G 80.64 95.44 41.30 yaml weights hrnet_w48 D910x8-G 81.19 95.69 77.57 yaml weights inception_v3 D910x8-G 79.11 94.40 27.20 yaml weights inception_v4 D910x8-G 80.88 95.34 42.74 yaml weights mixnet_s D910x8-G 75.52 92.52 4.17 yaml weights mixnet_m D910x8-G 76.64 93.05 5.06 yaml weights mixnet_l D910x8-G 78.73 94.31 7.38 yaml weights mnasnet_050 D910x8-G 68.07 88.09 2.14 yaml weights mnasnet_075 D910x8-G 71.81 90.53 3.20 yaml weights mnasnet_100 D910x8-G 74.28 91.70 4.42 yaml weights mnasnet_130 D910x8-G 75.65 92.64 6.33 yaml weights mnasnet_140 D910x8-G 76.01 92.83 7.16 yaml weights mobilenet_v1_025 D910x8-G 53.87 77.66 0.47 yaml weights mobilenet_v1_050 D910x8-G 65.94 86.51 1.34 yaml weights mobilenet_v1_075 D910x8-G 70.44 89.49 2.60 yaml weights mobilenet_v1_100 D910x8-G 72.95 91.01 4.25 yaml weights mobilenet_v2_075 D910x8-G 69.98 89.32 2.66 yaml weights mobilenet_v2_100 D910x8-G 72.27 90.72 3.54 yaml weights mobilenet_v2_140 D910x8-G 75.56 92.56 6.15 yaml weights mobilenet_v3_small_100 D910x8-G 68.10 87.86 2.55 yaml weights mobilenet_v3_large_100 D910x8-G 75.23 92.31 5.51 yaml weights mobilevit_xx_small D910x8-G 68.91 88.91 1.27 yaml weights mobilevit_x_small D910x8-G 74.99 92.32 2.32 yaml weights mobilevit_small D910x8-G 78.47 94.18 5.59 yaml weights nasnet_a_4x1056 D910x8-G 73.65 91.25 5.33 yaml weights pit_ti D910x8-G 72.96 91.33 4.85 yaml weights pit_xs D910x8-G 78.41 94.06 10.61 yaml weights pit_s D910x8-G 80.56 94.80 23.46 yaml weights pit_b D910x8-G 81.87 95.04 73.76 yaml weights poolformer_s12 D910x8-G 77.33 93.34 11.92 yaml weights pvt_tiny D910x8-G 74.81 92.18 13.23 yaml weights pvt_small D910x8-G 79.66 94.71 24.49 yaml weights pvt_medium D910x8-G 81.82 95.81 44.21 yaml weights pvt_large D910x8-G 81.75 95.70 61.36 yaml weights pvt_v2_b0 D910x8-G 71.50 90.60 3.67 yaml weights pvt_v2_b1 D910x8-G 78.91 94.49 14.01 yaml weights pvt_v2_b2 D910x8-G 81.99 95.74 25.35 yaml weights pvt_v2_b3 D910x8-G 82.84 96.24 45.24 yaml weights pvt_v2_b4 D910x8-G 83.14 96.27 62.56 yaml weights regnet_x_200mf D910x8-G 68.74 88.38 2.68 yaml weights regnet_x_400mf D910x8-G 73.16 91.35 5.16 yaml weights regnet_x_600mf D910x8-G 74.34 92.00 6.20 yaml weights regnet_x_800mf D910x8-G 76.04 92.97 7.26 yaml weights regnet_y_200mf D910x8-G 70.30 89.61 3.16 yaml weights regnet_y_400mf D910x8-G 73.91 91.84 4.34 yaml weights regnet_y_600mf D910x8-G 75.69 92.50 6.06 yaml weights regnet_y_800mf D910x8-G 76.52 93.10 6.26 yaml weights regnet_y_16gf D910x8-G 82.92 96.29 83.71 yaml weights repmlp_t224 D910x8-G 76.71 93.30 38.30 yaml weights repvgg_a0 D910x8-G 72.19 90.75 9.13 yaml weights repvgg_a1 D910x8-G 74.19 91.89 14.12 yaml weights repvgg_a2 D910x8-G 76.63 93.42 28.25 yaml weights repvgg_b0 D910x8-G 74.99 92.40 15.85 yaml weights repvgg_b1 D910x8-G 78.81 94.37 57.48 yaml weights repvgg_b2 D910x64-G 79.29 94.66 89.11 yaml weights repvgg_b3 D910x64-G 80.46 95.34 123.19 yaml weights repvgg_b1g2 D910x8-G 78.03 94.09 45.85 yaml weights repvgg_b1g4 D910x8-G 77.64 94.03 40.03 yaml weights repvgg_b2g4 D910x8-G 78.8 94.36 61.84 yaml weights res2net50 D910x8-G 79.35 94.64 25.76 yaml weights res2net101 D910x8-G 79.56 94.70 45.33 yaml weights res2net50_v1b D910x8-G 80.32 95.09 25.77 yaml weights res2net101_v1b D910x8-G 81.14 95.41 45.35 yaml weights resnest50 D910x8-G 80.81 95.16 27.55 yaml weights resnest101 D910x8-G 82.90 96.12 48.41 yaml weights resnet18 D910x8-G 70.21 89.62 11.70 yaml weights resnet34 D910x8-G 74.15 91.98 21.81 yaml weights resnet50 D910x8-G 76.69 93.50 25.61 yaml weights resnet101 D910x8-G 78.24 94.09 44.65 yaml weights resnet152 D910x8-G 78.72 94.45 60.34 yaml weights resnetv2_50 D910x8-G 76.90 93.37 25.60 yaml weights resnetv2_101 D910x8-G 78.48 94.23 44.55 yaml weights resnext50_32x4d D910x8-G 78.53 94.10 25.10 yaml weights resnext101_32x4d D910x8-G 79.83 94.80 44.32 yaml weights resnext101_64x4d D910x8-G 80.30 94.82 83.66 yaml weights resnext152_64x4d D910x8-G 80.52 95.00 115.27 yaml weights rexnet_09 D910x8-G 77.06 93.41 4.13 yaml weights rexnet_10 D910x8-G 77.38 93.60 4.84 yaml weights rexnet_13 D910x8-G 79.06 94.28 7.61 yaml weights rexnet_15 D910x8-G 79.95 94.74 9.79 yaml weights rexnet_20 D910x8-G 80.64 94.99 16.45 yaml weights seresnet18 D910x8-G 71.81 90.49 11.80 yaml weights seresnet34 D910x8-G 75.38 92.50 21.98 yaml weights seresnet50 D910x8-G 78.32 94.07 28.14 yaml weights seresnext26_32x4d D910x8-G 77.17 93.42 16.83 yaml weights seresnext50_32x4d D910x8-G 78.71 94.36 27.63 yaml weights shufflenet_v1_g3_05 D910x8-G 57.05 79.73 0.73 yaml weights shufflenet_v1_g3_10 D910x8-G 67.77 87.73 1.89 yaml weights shufflenet_v2_x0_5 D910x8-G 60.53 82.11 1.37 yaml weights shufflenet_v2_x1_0 D910x8-G 69.47 88.88 2.29 yaml weights shufflenet_v2_x1_5 D910x8-G 72.79 90.93 3.53 yaml weights shufflenet_v2_x2_0 D910x8-G 75.07 92.08 7.44 yaml weights skresnet18 D910x8-G 73.09 91.20 11.97 yaml weights skresnet34 D910x8-G 76.71 93.10 22.31 yaml weights skresnext50_32x4d D910x8-G 79.08 94.60 37.31 yaml weights squeezenet1_0 D910x8-G 59.01 81.01 1.25 yaml weights squeezenet1_0 GPUx8-G 58.83 81.08 1.25 yaml weights squeezenet1_1 D910x8-G 58.44 80.84 1.24 yaml weights squeezenet1_1 GPUx8-G 59.18 81.41 1.24 yaml weights swin_tiny D910x8-G 80.82 94.80 33.38 yaml weights swinv2_tiny_window8 D910x8-G 81.42 95.43 28.78 yaml weights vgg11 D910x8-G 71.86 90.50 132.86 yaml weights vgg13 D910x8-G 72.87 91.02 133.04 yaml weights vgg16 D910x8-G 74.61 91.87 138.35 yaml weights vgg19 D910x8-G 75.21 92.56 143.66 yaml weights visformer_tiny D910x8-G 78.28 94.15 10.33 yaml weights visformer_tiny_v2 D910x8-G 78.82 94.41 9.38 yaml weights visformer_small D910x8-G 81.76 95.88 40.25 yaml weights visformer_small_v2 D910x8-G 82.17 95.90 23.52 yaml weights vit_b_32_224 D910x8-G 75.86 92.08 87.46 yaml weights vit_l_16_224 D910x8-G 76.34 92.79 303.31 yaml weights vit_l_32_224 D910x8-G 73.71 90.92 305.52 yaml weights volo_d1 D910x8-G 82.59 95.99 27 yaml weights xception D910x8-G 79.01 94.25 22.91 yaml weights xcit_tiny_12_p16_224 D910x8-G 77.67 93.79 7.00 yaml weights"},{"location":"modelzoo/#notes","title":"Notes","text":"
  • Context: Training context denoted as {device}x{pieces}-{MS mode}, where mindspore mode can be G - graph mode or F - pynative mode with ms function. For example, D910x8-G is for training on 8 pieces of Ascend 910 NPU using graph mode.
  • Top-1 and Top-5: Accuracy reported on the validation set of ImageNet-1K.
"},{"location":"how_to_guides/write_a_new_model/","title":"Write A New Model","text":"

This document provides a reference template for writing the model definition file model.py in the MindSpore, aiming to provide a unified code style.

Next, let's take MLP-Mixer as an example.

"},{"location":"how_to_guides/write_a_new_model/#file-header","title":"File Header","text":"

A brief description of the document. Include the model name and paper title. As follows:

\"\"\"\nMindSpore implementation of `${MODEL_NAME}`.\nRefer to ${PAPER_NAME}.\n\"\"\"\n
"},{"location":"how_to_guides/write_a_new_model/#module-import","title":"Module Import","text":"

There are three types of module imports. Respectively

  • Python native or third-party libraries. For example, import math and import numpy as np. It should be placed in the first echelon.
  • MindSpore related modules. For example, import mindspore.nn as nn and import mindspore.ops as ops. It should be placed in the second echelon.
  • The module in the MindCV package. For example, from .layers.classifier import ClassifierHead. It should be placed in the third echelon and use relative import.

Examples are as follows:

import math\nfrom collections import OrderedDict\n\nimport mindspore.nn as nn\nimport mindspore.ops as ops\nimport mindspore.common.initializer as init\n\nfrom .utils import load_pretrained\nfrom .layers.classifier import ClassifierHead\n

Only import necessary modules or packages to avoid importing useless packages.

"},{"location":"how_to_guides/write_a_new_model/#__all__","title":"__all__","text":"

Python has no native visibility control, its visibility is maintained by a set of \"conventions\" that everyone should consciously abide by __all__ is a convention for exposing interfaces to modules and provides a \"white list\" to expose the interface. If __all__ is defined, other files use from xxx import * to import this file, only the members listed in __all__ will be imported, and other members can be excluded.

We agree that the exposed interfaces in the model include the main model class and functions that return models of different specifications, such as:

__all__ = [\n    \"MLPMixer\",\n    \"mlp_mixer_s_p32\",\n    \"mlp_mixer_s_p16\",\n    ...\n]\n

Where MLPMixer is the main model class, and mlp_mixer_s_p32 and mlp_mixer_s_p16 are functions that return models of different specifications. Generally speaking, a submodel, that is, a Layer or a Block, should not be shared by other files. If this is the case, you should consider extracting the submodel under ${MINDCLS}/models/layers as a common module, such as SEBlock.

"},{"location":"how_to_guides/write_a_new_model/#submodel","title":"Submodel","text":"

We all know that a depth model is a network composed of multiple layers. Some of these layers can form sub-models of the same topology, which we generally call Layer or Block, such as ResidualBlock. This kind of abstraction is conducive to our understanding of the whole model structure and is also conducive to code writing.

We should briefly describe the function of the sub-model through class annotations. In MindSpore, the model class inherits from nn.Cell. Generally speaking, we need to overload the following two functions:

  • In the __init__ function, we should define the neural network layer that needs to be used in the model (the parameters in __init__ should be declared with parameter types, that is, type hint).
  • In the construct function, we define the model forward logic.

Examples are as follows:

class MixerBlock(nn.Cell):\n\"\"\"Mixer Layer with token-mixing MLP and channel-mixing MLP\"\"\"\n\n    def __init__(self,\n                 n_patches: int,\n                 n_channels: int,\n                 token_dim: int,\n                 channel_dim: int,\n                 dropout: float = 0.\n                 ) -> None:\n        super().__init__()\n        self.token_mix = nn.SequentialCell(\n            nn.LayerNorm((n_channels,)),\n            TransPose((0, 2, 1)),\n            FeedForward(n_patches, token_dim, dropout),\n            TransPose((0, 2, 1))\n        )\n        self.channel_mix = nn.SequentialCell(\n            nn.LayerNorm((n_channels,)),\n            FeedForward(n_channels, channel_dim, dropout),\n        )\n\n    def construct(self, x):\n        x = x + self.token_mix(x)\n        x = x + self.channel_mix(x)\n        return x\n

In the process of compiling the nn.Cell class, there are two noteworthy aspects

  • CellList & SequentialCell

  • CellList is just a container that contains a list of neural network layers(Cell). The Cells contained by it can be properly registered and will be visible by all Cell methods. We must overwrite the forward calculation, that is, the construct function.

  • SequentialCell is a container that holds a sequential list of layers(Cell). The Cells may have a name(OrderedDict) or not(List). We don't need to implement forward computation, which is done according to the order of the sequential list.

  • construct

  • Assert is not supported. [RuntimeError: ParseStatement] Unsupported statement 'Assert'.

  • Usage of single operator. When calling an operator (such as concat, reshape, mean), use the functional interface mindspore.ops.functional (such as output=ops.concat((x1, x2)) to avoid instantiating the original operator ops.Primary (such as self.Concat()) in init before calling it in construct (output=self.concat((x1, x2)).

"},{"location":"how_to_guides/write_a_new_model/#master-model","title":"Master Model","text":"

The main model is the network model definition proposed in the paper, which is composed of multiple sub-models. It is the top-level network suitable for classification, detection, and other tasks. It is basically similar to the submodel in code writing, but there are several differences.

  • Class annotations. We should give the title and link of the paper here. In addition, since this class is exposed to the outside world, we'd better also add a description of the class initialization parameters. See code below.
  • forward_features function. The operational definition of the characteristic network of the model in the function.
  • forward_head function. The operation of the classifier of the model is defined in the function.
  • construct function. In function call feature network and classifier operation.
  • _initialize_weights function. We agree that the random initialization of model parameters is completed by this member function. See code below.

Examples are as follows:

class MLPMixer(nn.Cell):\nr\"\"\"MLP-Mixer model class, based on\n    `\"MLP-Mixer: An all-MLP Architecture for Vision\" <https://arxiv.org/abs/2105.01601>`_\n\n    Args:\n        depth (int) : number of MixerBlocks.\n        patch_size (Union[int, tuple]) : size of a single image patch.\n        n_patches (int) : number of patches.\n        n_channels (int) : channels(dimension) of a single embedded patch.\n        token_dim (int) : hidden dim of token-mixing MLP.\n        channel_dim (int) : hidden dim of channel-mixing MLP.\n        in_channels(int): number the channels of the input. Default: 3.\n        n_classes (int) : number of classification classes. Default: 1000.\n    \"\"\"\n\n    def __init__(self,\n                 depth: int,\n                 patch_size: Union[int, tuple],\n                 n_patches: int,\n                 n_channels: int,\n                 token_dim: int,\n                 channel_dim: int,\n                 in_channels: int = 3,\n                 n_classes: int = 1000,\n                 ) -> None:\n        super().__init__()\n        self.n_patches = n_patches\n        self.n_channels = n_channels\n        # patch with shape of (3, patch_size, patch_size) is embedded to n_channels dim feature.\n        self.to_patch_embedding = nn.SequentialCell(\n            nn.Conv2d(in_chans, n_channels, patch_size, patch_size, pad_mode=\"pad\", padding=0),\n            TransPose(permutation=(0, 2, 1), embedding=True),\n        )\n        self.mixer_blocks = nn.SequentialCell()\n        for _ in range(depth):\n            self.mixer_blocks.append(MixerBlock(n_patches, n_channels, token_dim, channel_dim))\n        self.layer_norm = nn.LayerNorm((n_channels,))\n        self.mlp_head = nn.Dense(n_channels, n_classes)\n        self._initialize_weights()\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.to_patch_embedding(x)\n        x = self.mixer_blocks(x)\n        x = self.layer_norm(x)\n        return ops.mean(x, 1)\n\n    def forward_head(self, x: Tensor)-> Tensor:\n        return self.mlp_head(x)\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        return self.forward_head(x)\n\n    def _initialize_weights(self) -> None:\n        for name, m in self.cells_and_names():\n            if isinstance(m, nn.Conv2d):\n                m.weight.set_data(init.initializer(init.Normal(0.01, 0), m.weight.shape))\n                if m.bias is not None:\n                    m.bias.set_data(init.initializer(init.Constant(0), m.bias.shape))\n            elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):\n                m.gamma.set_data(init.initializer(init.Constant(1), m.gamma.shape))\n                if m.beta is not None:\n                    m.beta.set_data(init.initializer(init.Constant(0.0001), m.beta.shape))\n            elif isinstance(m, nn.Dense):\n                m.weight.set_data(init.initializer(init.Normal(0.01, 0), m.weight.shape))\n                if m.bias is not None:\n                    m.bias.set_data(init.initializer(init.Constant(0), m.bias.shape))\n
"},{"location":"how_to_guides/write_a_new_model/#specification-function","title":"Specification Function","text":"

The model proposed in the paper may have different specifications, such as the size of the channel, the size of the depth, and so on. The specific configuration of these variants should be reflected through the specification function. The specification interface parameters: pretrained, num_classes, in_channels should be named uniformly. At the same time, the pretrain loading operation should be performed in the specification function. Each specification function corresponds to a specification variant that determines the configuration. The configuration transfers the definition of the main model class through the input parameter and returns the instantiated main model class. In addition, you need to register this specification of the model in the package by adding the decorator @register_model.

Examples are as follows:

@register_model\ndef mlp_mixer_s_p16(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):\n    nl, pr, ls, hs, ds, dc = 8, 16, 196, 512, 256, 2048\n    _check_resolution_and_length_of_patch(pr, ls)\n    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds,\n                    channel_dim=dc, in_chans=in_chans, n_classes=num_classes, **kwargs)\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n\n@register_model\ndef mlp_mixer_b_p32(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):\n    nl, pr, ls, hs, ds, dc = 12, 32, 49, 768, 384, 3072\n    _check_resolution_and_length_of_patch(pr, ls)\n    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds,\n                    channel_dim=dc, in_chans=in_chans, n_classes=num_classes, **kwargs)\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"how_to_guides/write_a_new_model/#verify-main-optional","title":"Verify Main (Optional)","text":"

The initial writing phase should ensure that the model is operational. The following code blocks can be used for basic verification:

if __name__ == '__main__':\n    import numpy as np\n    import mindspore\n    from mindspore import Tensor\n\n    model = mlp_mixer_s_p16()\n    print(model)\n    dummy_input = Tensor(np.random.rand(8, 3, 224, 224), dtype=mindspore.float32)\n    y = model(dummy_input)\n    print(y.shape)\n
"},{"location":"how_to_guides/write_a_new_model/#reference-example","title":"Reference Example","text":"
  • densenet.py
  • shufflenetv1.py
  • shufflenetv2.py
  • mixnet.py
  • mlp_mixer.py
"},{"location":"notes/changelog/","title":"Change Log","text":"

Coming soon.

"},{"location":"notes/code_of_conduct/","title":"Code of Conduct","text":"

Coming soon.

"},{"location":"notes/contributing/","title":"Contributing","text":""},{"location":"notes/contributing/#mindcv-contributing-guidelines","title":"MindCV Contributing Guidelines","text":"

Contributions are welcome, and they are greatly appreciated! Every little bit helps, and credit will always be given.

"},{"location":"notes/contributing/#contributor-license-agreement","title":"Contributor License Agreement","text":"

It's required to sign CLA before your first code submission to MindCV community.

For individual contributor, please refer to ICLA online document for the detailed information.

"},{"location":"notes/contributing/#types-of-contributions","title":"Types of Contributions","text":""},{"location":"notes/contributing/#report-bugs","title":"Report Bugs","text":"

Report bugs at https://github.com/mindspore-lab/mindcv/issues.

If you are reporting a bug, please include:

  • Your operating system name and version.
  • Any details about your local setup that might be helpful in troubleshooting.
  • Detailed steps to reproduce the bug.
"},{"location":"notes/contributing/#fix-bugs","title":"Fix Bugs","text":"

Look through the GitHub issues for bugs. Anything tagged with \"bug\" and \"help wanted\" is open to whoever wants to implement it.

"},{"location":"notes/contributing/#implement-features","title":"Implement Features","text":"

Look through the GitHub issues for features. Anything tagged with \"enhancement\" and \"help wanted\" is open to whoever wants to implement it.

"},{"location":"notes/contributing/#write-documentation","title":"Write Documentation","text":"

MindCV could always use more documentation, whether as part of the official MindCV docs, in docstrings, or even on the web in blog posts, articles, and such.

"},{"location":"notes/contributing/#submit-feedback","title":"Submit Feedback","text":"

The best way to send feedback is to file an issue at https://github.com/mindspore-lab/mindcv/issues.

If you are proposing a feature:

  • Explain in detail how it would work.
  • Keep the scope as narrow as possible, to make it easier to implement.
  • Remember that this is a volunteer-driven project, and that contributions are welcome :)
"},{"location":"notes/contributing/#getting-started","title":"Getting Started","text":"

Ready to contribute? Here's how to set up mindcv for local development.

  1. Fork the mindcv repo on GitHub.
  2. Clone your fork locally:
git clone git@github.com:your_name_here/mindcv.git\n

After that, you should add official repository as the upstream repository:

git remote add upstream git@github.com:mindspore-lab/mindcv\n
  1. Install your local copy into a conda environment. Assuming you have conda installed, this is how you set up your fork for local development:
conda create -n mindcv python=3.8\nconda activate mindcv\ncd mindcv\npip install -e .\n
  1. Create a branch for local development:
git checkout -b name-of-your-bugfix-or-feature\n

Now you can make your changes locally.

  1. When you're done making changes, check that your changes pass the linters and the tests:
pre-commit run --show-diff-on-failure --color=always --all-files\npytest\n

If all static linting are passed, you will get output like:

otherwise, you need to fix the warnings according to the output:

To get pre-commit and pytest, just pip install them into your conda environment.

  1. Commit your changes and push your branch to GitHub:
git add .\ngit commit -m \"Your detailed description of your changes.\"\ngit push origin name-of-your-bugfix-or-feature\n
  1. Submit a pull request through the GitHub website.
"},{"location":"notes/contributing/#pull-request-guidelines","title":"Pull Request Guidelines","text":"

Before you submit a pull request, check that it meets these guidelines:

  1. The pull request should include tests.
  2. If the pull request adds functionality, the docs should be updated. Put your new functionality into a function with a docstring, and add the feature to the list in README.md.
  3. The pull request should work for Python 3.7, 3.8 and 3.9, and for PyPy. Check https://github.com/mindspore-lab/mindcv/actions and make sure that the tests pass for all supported Python versions.
"},{"location":"notes/contributing/#tips","title":"Tips","text":"

You can install the git hook scripts instead of linting with pre-commit run -a manually.

run flowing command to set up the git hook scripts

pre-commit install\n

now pre-commit will run automatically on git commit!

"},{"location":"notes/contributing/#releasing","title":"Releasing","text":"

A reminder for the maintainers on how to deploy. Make sure all your changes are committed (including an entry in HISTORY.md). Then run:

bump2version patch # possible: major / minor / patch\ngit push\ngit push --tags\n

GitHub Action will then deploy to PyPI if tests pass.

"},{"location":"notes/faq/","title":"FAQ","text":"

Coming soon.

"},{"location":"reference/data/","title":"Data","text":""},{"location":"reference/data/#auto-augmentation","title":"Auto Augmentation","text":""},{"location":"reference/data/#mindcv.data.auto_augment.auto_augment_transform","title":"mindcv.data.auto_augment.auto_augment_transform(configs, hparams)","text":"

Create a AutoAugment transform

PARAMETER DESCRIPTION configs

A string that defines the automatic augmentation configuration. It is composed of multiple parts separated by dashes (\"-\"). The first part defines the AutoAugment policy ('autoaug', 'autoaugr' or '3a': 'autoaug' for the original AutoAugment policy with PosterizeOriginal, 'autoaugr' for the AutoAugment policy with PosterizeIncreasing operation, '3a' for the AutoAugment only with 3 augmentations.) There is no order requirement for the remaining config parts.

  • mstd: Float standard deviation of applied magnitude noise.

Example: 'autoaug-mstd0.5' will be automatically augment using the autoaug strategy and magnitude_std 0.5.

hparams

Other hparams of the automatic augmentation scheme.

Source code in mindcv/data/auto_augment.py
def auto_augment_transform(configs, hparams):\n\"\"\"\n    Create a AutoAugment transform\n    Args:\n        configs: A string that defines the automatic augmentation configuration.\n            It is composed of multiple parts separated by dashes (\"-\"). The first part defines\n            the AutoAugment policy ('autoaug', 'autoaugr' or '3a':\n            'autoaug' for the original AutoAugment policy with PosterizeOriginal,\n            'autoaugr' for the AutoAugment policy with PosterizeIncreasing operation,\n             '3a' for the AutoAugment only with 3 augmentations.)\n            There is no order requirement for the remaining config parts.\n\n            - mstd: Float standard deviation of applied magnitude noise.\n\n            Example: 'autoaug-mstd0.5' will be automatically augment using the autoaug strategy\n            and magnitude_std 0.5.\n        hparams: Other hparams of the automatic augmentation scheme.\n    \"\"\"\n    config = configs.split(\"-\")\n    policy_name = config[0]\n    config = config[1:]\n    hparams.setdefault(\"magnitude_std\", 0.5)  # default magnitude_std is set to 0.5\n    for c in config:\n        cs = re.split(r\"(\\d.*)\", c)\n        if len(cs) < 2:\n            continue\n        key, val = cs[:2]\n        if key == \"mstd\":\n            # noise param injected via hparams for now\n            hparams.setdefault(\"magnitude_std\", float(val))\n        else:\n            assert False, \"Unknown AutoAugment config section\"\n    aa_policy = auto_augment_policy(policy_name, hparams=hparams)\n    return AutoAugment(aa_policy)\n
"},{"location":"reference/data/#mindcv.data.auto_augment.rand_augment_transform","title":"mindcv.data.auto_augment.rand_augment_transform(configs, hparams)","text":"

Create a RandAugment transform

PARAMETER DESCRIPTION configs

A string that defines the random augmentation configuration. It is composed of multiple parts separated by dashes (\"-\"). The first part defines the AutoAugment policy ('randaug' policy). There is no order requirement for the remaining config parts.

  • m: Integer magnitude of rand augment. Default: 10
  • n: Integer num layer (number of transform operations selected for each image). Default: 2
  • w: Integer probability weight index (the index that affects a group of weights selected by operations).
  • mstd: Floating standard deviation of applied magnitude noise, or uniform sampling at infinity (or greater than 100).
  • mmax: Set the upper range limit for magnitude to a value other than the default value of _LEVEL_DENOM (10).
  • inc: Integer (bool), using the severity increase with magnitude (default: 0).

Example: 'randaug-w0-n3-mstd0.5' will be random augment using the weights 0, num_layers 3, magnitude_std 0.5.

hparams

Other hparams (kwargs) for the RandAugmentation scheme.

Source code in mindcv/data/auto_augment.py
def rand_augment_transform(configs, hparams):\n\"\"\"\n    Create a RandAugment transform\n    Args:\n        configs: A string that defines the random augmentation configuration.\n            It is composed of multiple parts separated by dashes (\"-\").\n            The first part defines the AutoAugment policy ('randaug' policy).\n            There is no order requirement for the remaining config parts.\n\n            - m: Integer magnitude of rand augment. Default: 10\n            - n: Integer num layer (number of transform operations selected for each image). Default: 2\n            - w: Integer probability weight index (the index that affects a group of weights selected by operations).\n            - mstd: Floating standard deviation of applied magnitude noise,\n                or uniform sampling at infinity (or greater than 100).\n            - mmax: Set the upper range limit for magnitude to a value\n                other than the default value of _LEVEL_DENOM (10).\n            - inc: Integer (bool), using the severity increase with magnitude (default: 0).\n\n            Example: 'randaug-w0-n3-mstd0.5' will be random augment\n                using the weights 0, num_layers 3, magnitude_std 0.5.\n        hparams: Other hparams (kwargs) for the RandAugmentation scheme.\n    \"\"\"\n    magnitude = _LEVEL_DENOM  # default to _LEVEL_DENOM for magnitude (currently 10)\n    num_layers = 2  # default to 2 ops per image\n    hparams.setdefault(\"magnitude_std\", 0.5)  # default magnitude_std is set to 0.5\n    weight_idx = None  # default to no probability weights for op choice\n    transforms = _RAND_TRANSFORMS\n    config = configs.split(\"-\")\n    assert config[0] == \"randaug\"\n    config = config[1:]\n    for c in config:\n        cs = re.split(r\"(\\d.*)\", c)\n        if len(cs) < 2:\n            continue\n        key, val = cs[:2]\n        if key == \"mstd\":\n            # noise param / randomization of magnitude values\n            mstd = float(val)\n            if mstd > 100:\n                # use uniform sampling in 0 to magnitude if mstd is > 100\n                mstd = float(\"inf\")\n            hparams.setdefault(\"magnitude_std\", mstd)\n        elif key == \"mmax\":\n            # clip magnitude between [0, mmax] instead of default [0, _LEVEL_DENOM]\n            hparams.setdefault(\"magnitude_max\", int(val))\n        elif key == \"inc\":\n            if bool(val):\n                transforms = _RAND_INCREASING_TRANSFORMS\n        elif key == \"m\":\n            magnitude = int(val)\n        elif key == \"n\":\n            num_layers = int(val)\n        elif key == \"w\":\n            weight_idx = int(val)\n        else:\n            assert False, \"Unknown RandAugment config section\"\n    ra_ops = rand_augment_ops(magnitude=magnitude, hparams=hparams, transforms=transforms)\n    choice_weights = None if weight_idx is None else _select_rand_weights(weight_idx)\n    return RandAugment(ra_ops, num_layers, choice_weights=choice_weights)\n
"},{"location":"reference/data/#mindcv.data.auto_augment.trivial_augment_wide_transform","title":"mindcv.data.auto_augment.trivial_augment_wide_transform(configs, hparams)","text":"

Create a TrivialAugmentWide transform

PARAMETER DESCRIPTION configs

A string that defines the TrivialAugmentWide configuration. It is composed of multiple parts separated by dashes (\"-\"). The first part defines the AutoAugment name, it should be 'trivialaugwide'. the second part(not necessary) the maximum value of magnitude.

  • m: final magnitude of a operation will uniform sampling from [0, m] . Default: 31

Example: 'trivialaugwide-m20' will be TrivialAugmentWide with mgnitude uniform sampling from [0, 20],

hparams

Other hparams (kwargs) for the TrivialAugment scheme.

RETURNS DESCRIPTION

A Mindspore compatible Transform

Source code in mindcv/data/auto_augment.py
def trivial_augment_wide_transform(configs, hparams):\n\"\"\"\n    Create a TrivialAugmentWide transform\n    Args:\n        configs: A string that defines the TrivialAugmentWide configuration.\n            It is composed of multiple parts separated by dashes (\"-\").\n            The first part defines the AutoAugment name, it should be 'trivialaugwide'.\n            the second part(not necessary) the maximum value of magnitude.\n\n            - m: final magnitude of a operation will uniform sampling from [0, m] . Default: 31\n\n            Example: 'trivialaugwide-m20' will be TrivialAugmentWide\n            with mgnitude uniform sampling from [0, 20],\n        hparams: Other hparams (kwargs) for the TrivialAugment scheme.\n    Returns:\n        A Mindspore compatible Transform\n    \"\"\"\n    magnitude = 31\n    transforms = _TRIVIALAUGMENT_WIDE_TRANSFORMS\n    config = configs.split(\"-\")\n    assert config[0] == \"trivialaugwide\"\n    config = config[1:]\n    for c in config:\n        cs = re.split(r\"(\\d.*)\", c)\n        if len(cs) < 2:\n            continue\n        key, val = cs[:2]\n        if key == \"m\":\n            magnitude = int(val)\n        else:\n            assert False, \"Unknown TrivialAugmentWide config section\"\n    if not hparams:\n        hparams = dict()\n    hparams[\"magnitude_max\"] = magnitude\n    hparams[\"magnitude_std\"] = float(\"inf\")  # default to uniform sampling\n    hparams[\"trivialaugwide\"] = True\n    ta_ops = trivial_augment_wide_ops(magnitude=magnitude, hparams=hparams, transforms=transforms)\n    return TrivialAugmentWide(ta_ops)\n
"},{"location":"reference/data/#mindcv.data.auto_augment.augment_and_mix_transform","title":"mindcv.data.auto_augment.augment_and_mix_transform(configs, hparams=None)","text":"

Create AugMix PyTorch transform

PARAMETER DESCRIPTION configs

String defining configuration of AugMix augmentation. Consists of multiple sections separated by dashes ('-'). The first section defines the specific name of augment, it should be 'augmix'. The remaining sections, not order sepecific determine 'm' - integer magnitude (severity) of augmentation mix (default: 3) 'w' - integer width of augmentation chain (default: 3) 'd' - integer depth of augmentation chain (-1 is random [1, 3], default: -1) 'a' - integer or float, the args of beta deviation of beta for generate the weight, default 1.. Ex 'augmix-m5-w4-d2' results in AugMix with severity 5, chain width 4, chain depth 2

TYPE: str

hparams

Other hparams (kwargs) for the Augmentation transforms

DEFAULT: None

RETURNS DESCRIPTION

A Mindspore compatible Transform

Source code in mindcv/data/auto_augment.py
def augment_and_mix_transform(configs, hparams=None):\n\"\"\"Create AugMix PyTorch transform\n\n    Args:\n        configs (str): String defining configuration of AugMix augmentation. Consists of multiple sections separated\n            by dashes ('-'). The first section defines the specific name of augment, it should be 'augmix'.\n            The remaining sections, not order sepecific determine\n                'm' - integer magnitude (severity) of augmentation mix (default: 3)\n                'w' - integer width of augmentation chain (default: 3)\n                'd' - integer depth of augmentation chain (-1 is random [1, 3], default: -1)\n                'a' - integer or float, the args of beta deviation of beta for generate the weight, default 1..\n            Ex 'augmix-m5-w4-d2' results in AugMix with severity 5, chain width 4, chain depth 2\n\n        hparams: Other hparams (kwargs) for the Augmentation transforms\n\n    Returns:\n         A Mindspore compatible Transform\n    \"\"\"\n    magnitude = 3\n    width = 3\n    depth = -1\n    alpha = 1.0\n    config = configs.split(\"-\")\n    assert config[0] == \"augmix\"\n    config = config[1:]\n    for c in config:\n        cs = re.split(r\"(\\d.*)\", c)\n        if len(cs) < 2:\n            continue\n        key, val = cs[:2]\n        if key == \"m\":\n            magnitude = int(val)\n        elif key == \"w\":\n            width = int(val)\n        elif key == \"d\":\n            depth = int(val)\n        elif key == \"a\":\n            alpha = float(val)\n        else:\n            assert False, \"Unknown AugMix config section\"\n    if not hparams:\n        hparams = dict()\n    hparams[\"magnitude_std\"] = float(\"inf\")  # default to uniform sampling (if not set via mstd arg)\n    ops = augmix_ops(magnitude=magnitude, hparams=hparams)\n    return AugMixAugment(ops, alpha=alpha, width=width, depth=depth)\n
"},{"location":"reference/data/#dataset-factory","title":"Dataset Factory","text":""},{"location":"reference/data/#mindcv.data.dataset_factory.create_dataset","title":"mindcv.data.dataset_factory.create_dataset(name='', root=None, split='train', shuffle=True, num_samples=None, num_shards=None, shard_id=None, num_parallel_workers=None, download=False, num_aug_repeats=0, **kwargs)","text":"

Creates dataset by name.

PARAMETER DESCRIPTION name

dataset name like MNIST, CIFAR10, ImageNeT, ''. '' means a customized dataset. Default: ''.

TYPE: str DEFAULT: ''

root

dataset root dir. Default: None.

TYPE: Optional[str] DEFAULT: None

split

data split: '' or split name string (train/val/test), if it is '', no split is used. Otherwise, it is a subfolder of root dir, e.g., train, val, test. Default: 'train'.

TYPE: str DEFAULT: 'train'

shuffle

whether to shuffle the dataset. Default: True.

TYPE: bool DEFAULT: True

num_samples

Number of elements to sample (default=None, which means sample all elements).

TYPE: Optional[int] DEFAULT: None

num_shards

Number of shards that the dataset will be divided into (default=None). When this argument is specified, num_samples reflects the maximum sample number of per shard.

TYPE: Optional[int] DEFAULT: None

shard_id

The shard ID within num_shards (default=None). This argument can only be specified when num_shards is also specified.

TYPE: Optional[int] DEFAULT: None

num_parallel_workers

Number of workers to read the data (default=None, set in the config).

TYPE: Optional[int] DEFAULT: None

download

whether to download the dataset. Default: False

TYPE: bool DEFAULT: False

num_aug_repeats

Number of dataset repetition for repeated augmentation. If 0 or 1, repeated augmentation is disabled. Otherwise, repeated augmentation is enabled and the common choice is 3. (Default: 0)

TYPE: int DEFAULT: 0

Note

For custom datasets and imagenet, the dataset dir should follow the structure like: .dataset_name/ \u251c\u2500\u2500 split1/ \u2502 \u251c\u2500\u2500 class1/ \u2502 \u2502 \u251c\u2500\u2500 000001.jpg \u2502 \u2502 \u251c\u2500\u2500 000002.jpg \u2502 \u2502 \u2514\u2500\u2500 .... \u2502 \u2514\u2500\u2500 class2/ \u2502 \u251c\u2500\u2500 000001.jpg \u2502 \u251c\u2500\u2500 000002.jpg \u2502 \u2514\u2500\u2500 .... \u2514\u2500\u2500 split2/ \u251c\u2500\u2500 class1/ \u2502 \u251c\u2500\u2500 000001.jpg \u2502 \u251c\u2500\u2500 000002.jpg \u2502 \u2514\u2500\u2500 .... \u2514\u2500\u2500 class2/ \u251c\u2500\u2500 000001.jpg \u251c\u2500\u2500 000002.jpg \u2514\u2500\u2500 ....

RETURNS DESCRIPTION

Dataset object

Source code in mindcv/data/dataset_factory.py
def create_dataset(\n    name: str = \"\",\n    root: Optional[str] = None,\n    split: str = \"train\",\n    shuffle: bool = True,\n    num_samples: Optional[int] = None,\n    num_shards: Optional[int] = None,\n    shard_id: Optional[int] = None,\n    num_parallel_workers: Optional[int] = None,\n    download: bool = False,\n    num_aug_repeats: int = 0,\n    **kwargs,\n):\nr\"\"\"Creates dataset by name.\n\n    Args:\n        name: dataset name like MNIST, CIFAR10, ImageNeT, ''. '' means a customized dataset. Default: ''.\n        root: dataset root dir. Default: None.\n        split: data split: '' or split name string (train/val/test), if it is '', no split is used.\n            Otherwise, it is a subfolder of root dir, e.g., train, val, test. Default: 'train'.\n        shuffle: whether to shuffle the dataset. Default: True.\n        num_samples: Number of elements to sample (default=None, which means sample all elements).\n        num_shards: Number of shards that the dataset will be divided into (default=None).\n            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.\n        shard_id: The shard ID within `num_shards` (default=None).\n            This argument can only be specified when `num_shards` is also specified.\n        num_parallel_workers: Number of workers to read the data (default=None, set in the config).\n        download: whether to download the dataset. Default: False\n        num_aug_repeats: Number of dataset repetition for repeated augmentation.\n            If 0 or 1, repeated augmentation is disabled.\n            Otherwise, repeated augmentation is enabled and the common choice is 3. (Default: 0)\n\n    Note:\n        For custom datasets and imagenet, the dataset dir should follow the structure like:\n        .dataset_name/\n        \u251c\u2500\u2500 split1/\n        \u2502  \u251c\u2500\u2500 class1/\n        \u2502  \u2502   \u251c\u2500\u2500 000001.jpg\n        \u2502  \u2502   \u251c\u2500\u2500 000002.jpg\n        \u2502  \u2502   \u2514\u2500\u2500 ....\n        \u2502  \u2514\u2500\u2500 class2/\n        \u2502      \u251c\u2500\u2500 000001.jpg\n        \u2502      \u251c\u2500\u2500 000002.jpg\n        \u2502      \u2514\u2500\u2500 ....\n        \u2514\u2500\u2500 split2/\n           \u251c\u2500\u2500 class1/\n           \u2502   \u251c\u2500\u2500 000001.jpg\n           \u2502   \u251c\u2500\u2500 000002.jpg\n           \u2502   \u2514\u2500\u2500 ....\n           \u2514\u2500\u2500 class2/\n               \u251c\u2500\u2500 000001.jpg\n               \u251c\u2500\u2500 000002.jpg\n               \u2514\u2500\u2500 ....\n\n    Returns:\n        Dataset object\n    \"\"\"\n    name = name.lower()\n    if root is None:\n        root = os.path.join(get_dataset_download_root(), name)\n\n    assert (num_samples is None) or (num_aug_repeats == 0), \"num_samples and num_aug_repeats can NOT be set together.\"\n\n    # subset sampling\n    if num_samples is not None and num_samples > 0:\n        # TODO: rewrite ordered distributed sampler (subset sampling in distributed mode is not tested)\n        if num_shards is not None and num_shards > 1:  # distributed\n            _logger.info(f\"number of shards: {num_shards}, number of samples: {num_samples}\")\n            sampler = DistributedSampler(num_shards, shard_id, shuffle=shuffle, num_samples=num_samples)\n        else:  # standalone\n            if shuffle:\n                sampler = ds.RandomSampler(replacement=False, num_samples=num_samples)\n            else:\n                sampler = ds.SequentialSampler(num_samples=num_samples)\n        mindspore_kwargs = dict(\n            shuffle=None,\n            sampler=sampler,\n            num_parallel_workers=num_parallel_workers,\n            **kwargs,\n        )\n    else:\n        sampler = None\n        mindspore_kwargs = dict(\n            shuffle=shuffle,\n            sampler=sampler,\n            num_shards=num_shards,\n            shard_id=shard_id,\n            num_parallel_workers=num_parallel_workers,\n            **kwargs,\n        )\n\n    # sampler for repeated augmentation\n    if num_aug_repeats > 0:\n        dataset_size = get_dataset_size(name, root, split)\n        _logger.info(\n            f\"Repeated augmentation is enabled, num_aug_repeats: {num_aug_repeats}, \"\n            f\"original dataset size: {dataset_size}.\"\n        )\n        # since drop_remainder is usually True, we don't need to do rounding in sampling\n        sampler = RepeatAugSampler(\n            dataset_size,\n            num_shards=num_shards,\n            rank_id=shard_id,\n            num_repeats=num_aug_repeats,\n            selected_round=0,\n            shuffle=shuffle,\n        )\n        mindspore_kwargs = dict(shuffle=None, sampler=sampler, num_shards=None, shard_id=None, **kwargs)\n\n    # create dataset\n    if name in _MINDSPORE_BASIC_DATASET:\n        dataset_class = _MINDSPORE_BASIC_DATASET[name][0]\n        dataset_download = _MINDSPORE_BASIC_DATASET[name][1]\n        dataset_new_path = None\n        if download:\n            if shard_id is not None:\n                root = os.path.join(root, f\"dataset_{str(shard_id)}\")\n            dataset_download = dataset_download(root)\n            dataset_download.download()\n            dataset_new_path = dataset_download.path\n\n        dataset = dataset_class(\n            dataset_dir=dataset_new_path if dataset_new_path else root,\n            usage=split,\n            **mindspore_kwargs,\n        )\n        # address ms dataset num_classes empty issue\n        if name == \"mnist\":\n            dataset.num_classes = lambda: 10\n        elif name == \"cifar10\":\n            dataset.num_classes = lambda: 10\n        elif name == \"cifar100\":\n            dataset.num_classes = lambda: 100\n\n    else:\n        if name == \"imagenet\" and download:\n            raise ValueError(\n                \"Imagenet dataset download is not supported. \"\n                \"Please download imagenet from https://www.image-net.org/download.php, \"\n                \"and parse the path of dateset directory via args.data_dir.\"\n            )\n\n        if os.path.isdir(root):\n            root = os.path.join(root, split)\n        dataset = ImageFolderDataset(dataset_dir=root, **mindspore_kwargs)\n\"\"\" Another implementation which a bit slower than ImageFolderDataset\n            imagenet_dataset = ImageNetDataset(dataset_dir=root)\n            sampler = RepeatAugSampler(len(imagenet_dataset), num_shards=num_shards, rank_id=shard_id,\n                                       num_repeats=repeated_aug, selected_round=1, shuffle=shuffle)\n            dataset = ds.GeneratorDataset(imagenet_dataset, column_names=imagenet_dataset.column_names, sampler=sampler)\n        \"\"\"\n    return dataset\n
"},{"location":"reference/data/#sampler","title":"Sampler","text":""},{"location":"reference/data/#mindcv.data.distributed_sampler.RepeatAugSampler","title":"mindcv.data.distributed_sampler.RepeatAugSampler","text":"

Sampler that restricts data loading to a subset of the dataset for distributed, with repeated augmentation. It ensures that different each augmented version of a sample will be visible to a different process.

This sampler was adapted from https://github.com/facebookresearch/deit/blob/0c4b8f60/samplers.py

PARAMETER DESCRIPTION dataset_size

dataset size.

num_shards

num devices.

DEFAULT: None

rank_id

device id.

DEFAULT: None

shuffle(bool)

True for using shuffle, False for not using.

num_repeats(int)

num of repeated instances in repeated augmentation, Default:3.

selected_round(int)

round the total num of samples by this factor, Defailt:256.

Source code in mindcv/data/distributed_sampler.py
class RepeatAugSampler:\n\"\"\"Sampler that restricts data loading to a subset of the dataset for distributed,\n    with repeated augmentation.\n    It ensures that different each augmented version of a sample will be visible to a\n    different process.\n\n    This sampler was adapted from https://github.com/facebookresearch/deit/blob/0c4b8f60/samplers.py\n\n    Args:\n        dataset_size: dataset size.\n        num_shards: num devices.\n        rank_id: device id.\n        shuffle(bool): True for using shuffle, False for not using.\n        num_repeats(int): num of repeated instances in repeated augmentation, Default:3.\n        selected_round(int): round the total num of samples by this factor, Defailt:256.\n    \"\"\"\n\n    def __init__(\n        self,\n        dataset_size,\n        num_shards=None,\n        rank_id=None,\n        shuffle=True,\n        num_repeats=3,\n        selected_round=256,\n    ):\n        if num_shards is None:\n            _logger.warning(\"num_shards is set to 1 in RepeatAugSampler since it is not passed in\")\n            num_shards = 1\n        if rank_id is None:\n            rank_id = 0\n\n        # assert isinstance(num_repeats, int), f'num_repeats should be Type integer, but got {type(num_repeats)}'\n\n        self.dataset_size = dataset_size\n        self.num_shards = num_shards\n        self.rank_id = rank_id\n        self.shuffle = shuffle\n        self.num_repeats = int(num_repeats)\n        self.epoch = 0\n        self.num_samples = int(math.ceil(self.dataset_size * num_repeats / self.num_shards))\n        self.total_size = self.num_samples * self.num_shards\n        # Determine the number of samples to select per epoch for each rank.\n        if selected_round:\n            self.num_selected_samples = int(\n                math.floor(self.dataset_size // selected_round * selected_round / num_shards)\n            )\n        else:\n            self.num_selected_samples = int(math.ceil(self.dataset_size / num_shards))\n\n    def __iter__(self):\n        # deterministically shuffle based on epoch\n        # print('__iter__  generating new shuffled indices: ', self.epoch)\n        if self.shuffle:\n            indices = np.random.RandomState(seed=self.epoch).permutation(self.dataset_size)\n            indices = indices.tolist()\n            self.epoch += 1\n            # print(indices[:30])\n        else:\n            indices = list(range(self.dataset_size))\n        # produce repeats e.g. [0, 0, 0, 1, 1, 1, 2, 2, 2....]\n        indices = [ele for ele in indices for i in range(self.num_repeats)]\n\n        # add extra samples to make it evenly divisible\n        padding_size = self.total_size - len(indices)\n        if padding_size > 0:\n            indices += indices[:padding_size]\n        assert len(indices) == self.total_size\n\n        # subsample per rank\n        indices = indices[self.rank_id : self.total_size : self.num_shards]\n        assert len(indices) == self.num_samples\n\n        # return up to num selected samples\n        return iter(indices[: self.num_selected_samples])\n\n    def __len__(self):\n        return self.num_selected_samples\n\n    def set_epoch(self, epoch):\n        self.epoch = epoch\n
"},{"location":"reference/data/#dataloader","title":"DataLoader","text":""},{"location":"reference/data/#mindcv.data.loader.create_loader","title":"mindcv.data.loader.create_loader(dataset, batch_size, drop_remainder=False, is_training=False, mixup=0.0, cutmix=0.0, cutmix_prob=0.0, num_classes=1000, transform=None, target_transform=None, num_parallel_workers=None, python_multiprocessing=False, separate=False)","text":"

Creates dataloader.

Applies operations such as transform and batch to the ms.dataset.Dataset object created by the create_dataset function to get the dataloader.

PARAMETER DESCRIPTION dataset

dataset object created by create_dataset.

TYPE: ms.dataset.Dataset

batch_size

The number of rows each batch is created with. An int or callable object which takes exactly 1 parameter, BatchInfo.

TYPE: int or function

drop_remainder

Determines whether to drop the last block whose data row number is less than batch size (default=False). If True, and if there are less than batch_size rows available to make the last batch, then those rows will be dropped and not propagated to the child node.

TYPE: bool DEFAULT: False

is_training

whether it is in train mode. Default: False.

TYPE: bool DEFAULT: False

mixup

mixup alpha, mixup will be enabled if > 0. (default=0.0).

TYPE: float DEFAULT: 0.0

cutmix

cutmix alpha, cutmix will be enabled if > 0. (default=0.0). This operation is experimental.

TYPE: float DEFAULT: 0.0

cutmix_prob

prob of doing cutmix for an image (default=0.0)

TYPE: float DEFAULT: 0.0

num_classes

the number of classes. Default: 1000.

TYPE: int DEFAULT: 1000

transform

the list of transformations that wil be applied on the image, which is obtained by create_transform. If None, the default imagenet transformation for evaluation will be applied. Default: None.

TYPE: list or None DEFAULT: None

target_transform

the list of transformations that will be applied on the label. If None, the label will be converted to the type of ms.int32. Default: None.

TYPE: list or None DEFAULT: None

num_parallel_workers

Number of workers(threads) to process the dataset in parallel (default=None).

TYPE: int DEFAULT: None

python_multiprocessing

Parallelize Python operations with multiple worker processes. This option could be beneficial if the Python operation is computational heavy (default=False).

TYPE: bool DEFAULT: False

separate(bool,

separate the image origin and the image been transformed

TYPE: optional

Note
  1. cutmix is now experimental (which means performance gain is not guarantee) and can not be used together with mixup due to the label int type conflict.
  2. is_training, mixup, num_classes is used for MixUp, which is a kind of transform operation. However, we are not able to merge it into transform, due to the limitations of the mindspore.dataset API.
RETURNS DESCRIPTION

BatchDataset, dataset batched.

Source code in mindcv/data/loader.py
def create_loader(\n    dataset,\n    batch_size,\n    drop_remainder=False,\n    is_training=False,\n    mixup=0.0,\n    cutmix=0.0,\n    cutmix_prob=0.0,\n    num_classes=1000,\n    transform=None,\n    target_transform=None,\n    num_parallel_workers=None,\n    python_multiprocessing=False,\n    separate=False,\n):\nr\"\"\"Creates dataloader.\n\n    Applies operations such as transform and batch to the `ms.dataset.Dataset` object\n    created by the `create_dataset` function to get the dataloader.\n\n    Args:\n        dataset (ms.dataset.Dataset): dataset object created by `create_dataset`.\n        batch_size (int or function): The number of rows each batch is created with. An\n            int or callable object which takes exactly 1 parameter, BatchInfo.\n        drop_remainder (bool, optional): Determines whether to drop the last block\n            whose data row number is less than batch size (default=False). If True, and if there are less\n            than batch_size rows available to make the last batch, then those rows will\n            be dropped and not propagated to the child node.\n        is_training (bool): whether it is in train mode. Default: False.\n        mixup (float): mixup alpha, mixup will be enabled if > 0. (default=0.0).\n        cutmix (float): cutmix alpha, cutmix will be enabled if > 0. (default=0.0). This operation is experimental.\n        cutmix_prob (float): prob of doing cutmix for an image (default=0.0)\n        num_classes (int): the number of classes. Default: 1000.\n        transform (list or None): the list of transformations that wil be applied on the image,\n            which is obtained by `create_transform`. If None, the default imagenet transformation\n            for evaluation will be applied. Default: None.\n        target_transform (list or None): the list of transformations that will be applied on the label.\n            If None, the label will be converted to the type of ms.int32. Default: None.\n        num_parallel_workers (int, optional): Number of workers(threads) to process the dataset in parallel\n            (default=None).\n        python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker processes. This\n            option could be beneficial if the Python operation is computational heavy (default=False).\n        separate(bool, optional): separate the image origin and the image been transformed\n\n    Note:\n        1. cutmix is now experimental (which means performance gain is not guarantee)\n            and can not be used together with mixup due to the label int type conflict.\n        2. `is_training`, `mixup`, `num_classes` is used for MixUp, which is a kind of transform operation.\n          However, we are not able to merge it into `transform`, due to the limitations of the `mindspore.dataset` API.\n\n\n    Returns:\n        BatchDataset, dataset batched.\n    \"\"\"\n\n    if target_transform is None:\n        target_transform = transforms.TypeCast(ms.int32)\n    target_input_columns = \"label\" if \"label\" in dataset.get_col_names() else \"fine_label\"\n    dataset = dataset.map(\n        operations=target_transform,\n        input_columns=target_input_columns,\n        num_parallel_workers=num_parallel_workers,\n        python_multiprocessing=python_multiprocessing,\n    )\n\n    if transform is None:\n        warnings.warn(\n            \"Using None as the default value of transform will set it back to \"\n            \"traditional image transform, which is not recommended. \"\n            \"You should explicitly call `create_transforms` and pass it to `create_loader`.\"\n        )\n        transform = create_transforms(\"imagenet\", is_training=False)\n\n    # only apply augment splits to train dataset\n    if separate and is_training:\n        assert isinstance(transform, tuple) and len(transform) == 3\n\n        # Note: mindspore-2.0 delete the parameter column_order\n        sig = inspect.signature(dataset.map)\n        pass_column_order = False if \"kwargs\" in sig.parameters else True\n\n        # map all the transform\n        dataset = map_transform_splits(\n            dataset, transform, num_parallel_workers, python_multiprocessing, pass_column_order\n        )\n        # after batch, datasets has 4 columns\n        dataset = dataset.batch(batch_size=batch_size, drop_remainder=drop_remainder)\n        # concat the 3 columns of image\n        dataset = dataset.map(\n            operations=concat_per_batch_map,\n            input_columns=[\"image_clean\", \"image_aug1\", \"image_aug2\", \"label\"],\n            output_columns=[\"image\", \"label\"],\n            column_order=[\"image\", \"label\"] if pass_column_order else None,\n            num_parallel_workers=num_parallel_workers,\n            python_multiprocessing=python_multiprocessing,\n        )\n\n    else:\n        dataset = dataset.map(\n            operations=transform,\n            input_columns=\"image\",\n            num_parallel_workers=num_parallel_workers,\n            python_multiprocessing=python_multiprocessing,\n        )\n\n        dataset = dataset.batch(batch_size=batch_size, drop_remainder=drop_remainder)\n\n    if is_training:\n        if (mixup + cutmix > 0.0) and batch_size > 1:\n            # TODO: use mindspore vision cutmix and mixup after the confliction fixed in later release\n            # set label_smoothing 0 here since label smoothing is computed in loss module\n            mixup_fn = Mixup(\n                mixup_alpha=mixup,\n                cutmix_alpha=cutmix,\n                cutmix_minmax=None,\n                prob=cutmix_prob,\n                switch_prob=0.5,\n                label_smoothing=0.0,\n                num_classes=num_classes,\n            )\n            # images in a batch are mixed. labels are converted soft onehot labels.\n            dataset = dataset.map(\n                operations=mixup_fn,\n                input_columns=[\"image\", target_input_columns],\n                num_parallel_workers=num_parallel_workers,\n            )\n\n    return dataset\n
"},{"location":"reference/data/#mixup","title":"MixUp","text":""},{"location":"reference/data/#mindcv.data.mixup.Mixup","title":"mindcv.data.mixup.Mixup","text":"

Mixup/Cutmix that applies different params to each element or whole batch

PARAMETER DESCRIPTION mixup_alpha

mixup alpha value, mixup is active if > 0.

TYPE: float DEFAULT: 1.0

cutmix_alpha

cutmix alpha value, cutmix is active if > 0.

TYPE: float DEFAULT: 0.0

cutmix_minmax

cutmix min/max image ratio, cutmix is active and uses this vs alpha if not None.

TYPE: List[float] DEFAULT: None

prob

probability of applying mixup or cutmix per batch or element

TYPE: float DEFAULT: 1.0

switch_prob

probability of switching to cutmix instead of mixup when both are active

TYPE: float DEFAULT: 0.5

mode

how to apply mixup/cutmix params (per 'batch', 'pair' (pair of elements), 'elem' (element)

TYPE: str DEFAULT: 'batch'

correct_lam

apply lambda correction when cutmix bbox clipped by image borders

TYPE: bool DEFAULT: True

label_smoothing

apply label smoothing to the mixed target tensor

TYPE: float DEFAULT: 0.1

num_classes

number of classes for target

TYPE: int DEFAULT: 1000

Source code in mindcv/data/mixup.py
class Mixup:\n\"\"\"Mixup/Cutmix that applies different params to each element or whole batch\n\n    Args:\n        mixup_alpha (float): mixup alpha value, mixup is active if > 0.\n        cutmix_alpha (float): cutmix alpha value, cutmix is active if > 0.\n        cutmix_minmax (List[float]): cutmix min/max image ratio, cutmix is active and uses this vs alpha if not None.\n        prob (float): probability of applying mixup or cutmix per batch or element\n        switch_prob (float): probability of switching to cutmix instead of mixup when both are active\n        mode (str): how to apply mixup/cutmix params (per 'batch', 'pair' (pair of elements), 'elem' (element)\n        correct_lam (bool): apply lambda correction when cutmix bbox clipped by image borders\n        label_smoothing (float): apply label smoothing to the mixed target tensor\n        num_classes (int): number of classes for target\n    \"\"\"\n\n    def __init__(\n        self,\n        mixup_alpha=1.0,\n        cutmix_alpha=0.0,\n        cutmix_minmax=None,\n        prob=1.0,\n        switch_prob=0.5,\n        mode=\"batch\",\n        correct_lam=True,\n        label_smoothing=0.1,\n        num_classes=1000,\n    ):\n        self.mixup_alpha = mixup_alpha\n        self.cutmix_alpha = cutmix_alpha\n        self.cutmix_minmax = cutmix_minmax\n        if self.cutmix_minmax is not None:\n            assert len(self.cutmix_minmax) == 2\n            # force cutmix alpha == 1.0 when minmax active to keep logic simple & safe\n            self.cutmix_alpha = 1.0\n        self.mix_prob = prob\n        self.switch_prob = switch_prob\n        self.label_smoothing = label_smoothing\n        self.num_classes = num_classes\n        self.mode = mode\n        self.correct_lam = correct_lam  # correct lambda based on clipped area for cutmix\n        self.mixup_enabled = True  # set false to disable mixing (intended tp be set by train loop)\n\n    def _params_per_elem(self, batch_size):\n\"\"\"_params_per_elem\"\"\"\n        lam = np.ones(batch_size, dtype=np.float32)\n        use_cutmix = np.zeros(batch_size, dtype=np.bool)\n        if self.mixup_enabled:\n            if self.mixup_alpha > 0.0 and self.cutmix_alpha > 0.0:\n                use_cutmix = np.random.rand(batch_size) < self.switch_prob\n                lam_mix = np.where(\n                    use_cutmix,\n                    np.random.beta(self.cutmix_alpha, self.cutmix_alpha, size=batch_size),\n                    np.random.beta(self.mixup_alpha, self.mixup_alpha, size=batch_size),\n                )\n            elif self.mixup_alpha > 0.0:\n                lam_mix = np.random.beta(self.mixup_alpha, self.mixup_alpha, size=batch_size)\n            elif self.cutmix_alpha > 0.0:\n                use_cutmix = np.ones(batch_size, dtype=np.bool)\n                lam_mix = np.random.beta(self.cutmix_alpha, self.cutmix_alpha, size=batch_size)\n            else:\n                assert False, \"One of mixup_alpha > 0., cutmix_alpha > 0., cutmix_minmax not None should be true.\"\n            lam = np.where(np.random.rand(batch_size) < self.mix_prob, lam_mix.astype(np.float32), lam)\n        return lam, use_cutmix\n\n    def _params_per_batch(self):\n\"\"\"_params_per_batch\"\"\"\n        lam = 1.0\n        use_cutmix = False\n        if self.mixup_enabled and np.random.rand() < self.mix_prob:\n            if self.mixup_alpha > 0.0 and self.cutmix_alpha > 0.0:\n                use_cutmix = np.random.rand() < self.switch_prob\n                lam_mix = (\n                    np.random.beta(self.cutmix_alpha, self.cutmix_alpha)\n                    if use_cutmix\n                    else np.random.beta(self.mixup_alpha, self.mixup_alpha)\n                )\n            elif self.mixup_alpha > 0.0:\n                lam_mix = np.random.beta(self.mixup_alpha, self.mixup_alpha)\n            elif self.cutmix_alpha > 0.0:\n                use_cutmix = True\n                lam_mix = np.random.beta(self.cutmix_alpha, self.cutmix_alpha)\n            else:\n                assert False, \"One of mixup_alpha > 0., cutmix_alpha > 0., cutmix_minmax not None should be true.\"\n            lam = float(lam_mix)\n        return lam, use_cutmix\n\n    def _mix_elem(self, x):\n\"\"\"_mix_elem\"\"\"\n        batch_size = len(x)\n        lam_batch, use_cutmix = self._params_per_elem(batch_size)\n        x_orig = x.clone()  # need to keep an unmodified original for mixing source\n        for i in range(batch_size):\n            j = batch_size - i - 1\n            lam = lam_batch[i]\n            if lam != 1.0:\n                if use_cutmix[i]:\n                    (yl, yh, xl, xh), lam = cutmix_bbox_and_lam(\n                        x[i].shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam\n                    )\n                    x[i][:, yl:yh, xl:xh] = x_orig[j][:, yl:yh, xl:xh]\n                    lam_batch[i] = lam\n                else:\n                    x[i] = x[i] * lam + x_orig[j] * (1 - lam)\n        return P.ExpandDims()(Tensor(lam_batch, dtype=mstype.float32), 1)\n\n    def _mix_pair(self, x):\n\"\"\"_mix_pair\"\"\"\n        batch_size = len(x)\n        lam_batch, use_cutmix = self._params_per_elem(batch_size // 2)\n        x_orig = x.clone()  # need to keep an unmodified original for mixing source\n        for i in range(batch_size // 2):\n            j = batch_size - i - 1\n            lam = lam_batch[i]\n            if lam != 1.0:\n                if use_cutmix[i]:\n                    (yl, yh, xl, xh), lam = cutmix_bbox_and_lam(\n                        x[i].shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam\n                    )\n                    x[i][:, yl:yh, xl:xh] = x_orig[j][:, yl:yh, xl:xh]\n                    x[j][:, yl:yh, xl:xh] = x_orig[i][:, yl:yh, xl:xh]\n                    lam_batch[i] = lam\n                else:\n                    x[i] = x[i] * lam + x_orig[j] * (1 - lam)\n                    x[j] = x[j] * lam + x_orig[i] * (1 - lam)\n        lam_batch = np.concatenate((lam_batch, lam_batch[::-1]))\n        return P.ExpandDims()(Tensor(lam_batch, dtype=mstype.float32), 1)\n\n    def _mix_batch(self, x):\n\"\"\"_mix_batch\"\"\"\n        lam, use_cutmix = self._params_per_batch()\n        if lam == 1.0:\n            return 1.0\n        if use_cutmix:\n            (yl, yh, xl, xh), lam = cutmix_bbox_and_lam(\n                x.shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam\n            )\n            x[:, :, yl:yh, xl:xh] = np.flip(x, axis=0)[:, :, yl:yh, xl:xh]\n        else:\n            x_flipped = np.flip(x, axis=0) * (1.0 - lam)\n            x *= lam\n            x += x_flipped\n        return lam\n\n    def __call__(self, x, target):\n\"\"\"Mixup apply\"\"\"\n        # the same to image, label\n        assert len(x) % 2 == 0, \"Batch size should be even when using this\"\n        if self.mode == \"elem\":\n            lam = self._mix_elem(x)\n        elif self.mode == \"pair\":\n            lam = self._mix_pair(x)\n        else:\n            lam = self._mix_batch(x)\n        target = mixup_target(target, self.num_classes, lam, self.label_smoothing)\n        return x.astype(np.float32), target.astype(np.float32)\n
"},{"location":"reference/data/#transform-factory","title":"Transform Factory","text":""},{"location":"reference/data/#mindcv.data.transforms_factory.create_transforms","title":"mindcv.data.transforms_factory.create_transforms(dataset_name='', image_resize=224, is_training=False, auto_augment=None, separate=False, **kwargs)","text":"

Creates a list of transform operation on image data.

PARAMETER DESCRIPTION dataset_name

if '', customized dataset. Currently, apply the same transform pipeline as ImageNet. if standard dataset name is given including imagenet, cifar10, mnist, preset transforms will be returned. Default: ''.

TYPE: str DEFAULT: ''

image_resize

the image size after resize for adapting to network. Default: 224.

TYPE: int DEFAULT: 224

is_training

if True, augmentation will be applied if support. Default: False.

TYPE: bool DEFAULT: False

separate

separate the image origin and the image been transformed.

DEFAULT: False

**kwargs

additional args parsed to transforms_imagenet_train and transforms_imagenet_eval

DEFAULT: {}

RETURNS DESCRIPTION

A list of transformation operations

Source code in mindcv/data/transforms_factory.py
def create_transforms(\n    dataset_name=\"\",\n    image_resize=224,\n    is_training=False,\n    auto_augment=None,\n    separate=False,\n    **kwargs,\n):\nr\"\"\"Creates a list of transform operation on image data.\n\n    Args:\n        dataset_name (str): if '', customized dataset. Currently, apply the same transform pipeline as ImageNet.\n            if standard dataset name is given including imagenet, cifar10, mnist, preset transforms will be returned.\n            Default: ''.\n        image_resize (int): the image size after resize for adapting to network. Default: 224.\n        is_training (bool): if True, augmentation will be applied if support. Default: False.\n        auto_augment(str)\uff1aaugmentation strategies, such as \"augmix\", \"autoaug\" etc.\n        separate: separate the image origin and the image been transformed.\n        **kwargs: additional args parsed to `transforms_imagenet_train` and `transforms_imagenet_eval`\n\n    Returns:\n        A list of transformation operations\n    \"\"\"\n\n    dataset_name = dataset_name.lower()\n\n    if dataset_name in (\"imagenet\", \"\"):\n        trans_args = dict(image_resize=image_resize, **kwargs)\n        if is_training:\n            return transforms_imagenet_train(auto_augment=auto_augment, separate=separate, **trans_args)\n\n        return transforms_imagenet_eval(**trans_args)\n    elif dataset_name in (\"cifar10\", \"cifar100\"):\n        trans_list = transforms_cifar(resize=image_resize, is_training=is_training)\n        return trans_list\n    elif dataset_name == \"mnist\":\n        trans_list = transforms_mnist(resize=image_resize)\n        return trans_list\n    else:\n        raise NotImplementedError(\n            f\"Only supports creating transforms for ['imagenet'] datasets, but got {dataset_name}.\"\n        )\n
"},{"location":"reference/loss/","title":"Loss","text":""},{"location":"reference/loss/#loss-factory","title":"Loss Factory","text":""},{"location":"reference/loss/#mindcv.loss.loss_factory.create_loss","title":"mindcv.loss.loss_factory.create_loss(name='CE', weight=None, reduction='mean', label_smoothing=0.0, aux_factor=0.0)","text":"

Creates loss function

PARAMETER DESCRIPTION name

loss name : 'CE' for cross_entropy. 'BCE': binary cross entropy. Default: 'CE'.

TYPE: str DEFAULT: 'CE'

weight

Class weight. A rescaling weight given to the loss of each batch element. If given, has to be a Tensor of size 'nbatch'. Data type must be float16 or float32.

TYPE: Tensor DEFAULT: None

reduction

Apply specific reduction method to the output: 'mean' or 'sum'. By default, the sum of the output will be divided by the number of elements in the output. 'sum': the output will be summed. Default:'mean'.

TYPE: str DEFAULT: 'mean'

label_smoothing

Label smoothing factor, a regularization tool used to prevent the model from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.

TYPE: float DEFAULT: 0.0

aux_factor

Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs (i.e., deep supervision), like inception_v3. Default: 0.0.

TYPE: float DEFAULT: 0.0

Inputs
  • logits (Tensor or Tuple of Tensor): Input logits. Shape [N, C], where N means the number of samples, C means number of classes. Tuple of two input logits are supported in order (main_logits, aux_logits) for auxiliary loss used in networks like inception_v3. Data type must be float16 or float32.
  • labels (Tensor): Ground truth labels. Shape: [N] or [N, C]. (1) If in shape [N], sparse labels representing the class indices. Must be int type. (2) shape [N, C], dense labels representing the ground truth class probability values, or the one-hot labels. Must be float type. If the loss type is BCE, the shape of labels must be [N, C].
RETURNS DESCRIPTION

Loss function to compute the loss between the input logits and labels.

Source code in mindcv/loss/loss_factory.py
def create_loss(\n    name: str = \"CE\",\n    weight: Optional[Tensor] = None,\n    reduction: str = \"mean\",\n    label_smoothing: float = 0.0,\n    aux_factor: float = 0.0,\n):\nr\"\"\"Creates loss function\n\n    Args:\n        name (str):  loss name : 'CE' for cross_entropy. 'BCE': binary cross entropy. Default: 'CE'.\n        weight (Tensor): Class weight. A rescaling weight given to the loss of each batch element.\n            If given, has to be a Tensor of size 'nbatch'. Data type must be float16 or float32.\n        reduction: Apply specific reduction method to the output: 'mean' or 'sum'.\n            By default, the sum of the output will be divided by the number of elements in the output.\n            'sum': the output will be summed. Default:'mean'.\n        label_smoothing: Label smoothing factor, a regularization tool used to prevent the model\n            from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.\n        aux_factor (float): Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs\n            (i.e., deep supervision), like inception_v3. Default: 0.0.\n\n    Inputs:\n        - logits (Tensor or Tuple of Tensor): Input logits. Shape [N, C], where N means the number of samples,\n            C means number of classes. Tuple of two input logits are supported in order (main_logits, aux_logits)\n            for auxiliary loss used in networks like inception_v3. Data type must be float16 or float32.\n        - labels (Tensor): Ground truth labels. Shape: [N] or [N, C].\n            (1) If in shape [N], sparse labels representing the class indices. Must be int type.\n            (2) shape [N, C], dense labels representing the ground truth class probability values,\n            or the one-hot labels. Must be float type. If the loss type is BCE, the shape of labels must be [N, C].\n\n    Returns:\n       Loss function to compute the loss between the input logits and labels.\n    \"\"\"\n    name = name.lower()\n\n    if name == \"ce\":\n        loss = CrossEntropySmooth(smoothing=label_smoothing, aux_factor=aux_factor, reduction=reduction, weight=weight)\n    elif name == \"bce\":\n        loss = BinaryCrossEntropySmooth(\n            smoothing=label_smoothing, aux_factor=aux_factor, reduction=reduction, weight=weight, pos_weight=None\n        )\n    elif name == \"asl_single_label\":\n        loss = AsymmetricLossSingleLabel(smoothing=label_smoothing)\n    elif name == \"asl_multi_label\":\n        loss = AsymmetricLossMultilabel()\n    elif name == \"jsd\":\n        loss = JSDCrossEntropy(smoothing=label_smoothing, aux_factor=aux_factor, reduction=reduction, weight=weight)\n    else:\n        raise NotImplementedError\n\n    return loss\n
"},{"location":"reference/loss/#cross-entropy","title":"Cross Entropy","text":""},{"location":"reference/loss/#mindcv.loss.cross_entropy_smooth.CrossEntropySmooth","title":"mindcv.loss.cross_entropy_smooth.CrossEntropySmooth","text":"

Bases: nn.LossBase

Cross entropy loss with label smoothing. Apply softmax activation function to input logits, and uses the given logits to compute cross entropy between the logits and the label.

PARAMETER DESCRIPTION smoothing

Label smoothing factor, a regularization tool used to prevent the model from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.

DEFAULT: 0.0

aux_factor

Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs (i.e., deep supervision), like inception_v3. Default: 0.0.

DEFAULT: 0.0

reduction

Apply specific reduction method to the output: 'mean' or 'sum'. Default: 'mean'.

DEFAULT: 'mean'

weight

Class weight. Shape [C]. A rescaling weight applied to the loss of each batch element. Data type must be float16 or float32.

TYPE: Tensor DEFAULT: None

Inputs

logits (Tensor or Tuple of Tensor): Input logits. Shape [N, C], where N is # samples, C is # classes. Tuple composed of multiple logits are supported in order (main_logits, aux_logits) for auxiliary loss used in networks like inception_v3. labels (Tensor): Ground truth label. Shape: [N] or [N, C]. (1) Shape (N), sparse labels representing the class indices. Must be int type. (2) Shape [N, C], dense labels representing the ground truth class probability values, or the one-hot labels. Must be float type.

Source code in mindcv/loss/cross_entropy_smooth.py
class CrossEntropySmooth(nn.LossBase):\n\"\"\"\n    Cross entropy loss with label smoothing.\n    Apply softmax activation function to input `logits`, and uses the given logits to compute cross entropy\n    between the logits and the label.\n\n    Args:\n        smoothing: Label smoothing factor, a regularization tool used to prevent the model\n            from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.\n        aux_factor: Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs\n            (i.e., deep supervision), like inception_v3.  Default: 0.0.\n        reduction: Apply specific reduction method to the output: 'mean' or 'sum'. Default: 'mean'.\n        weight (Tensor): Class weight. Shape [C]. A rescaling weight applied to the loss of each batch element.\n            Data type must be float16 or float32.\n\n    Inputs:\n        logits (Tensor or Tuple of Tensor): Input logits. Shape [N, C], where N is # samples, C is # classes.\n            Tuple composed of multiple logits are supported in order (main_logits, aux_logits)\n            for auxiliary loss used in networks like inception_v3.\n        labels (Tensor): Ground truth label. Shape: [N] or [N, C].\n            (1) Shape (N), sparse labels representing the class indices. Must be int type.\n            (2) Shape [N, C], dense labels representing the ground truth class probability values,\n            or the one-hot labels. Must be float type.\n    \"\"\"\n\n    def __init__(self, smoothing=0.0, aux_factor=0.0, reduction=\"mean\", weight=None):\n        super().__init__()\n        self.smoothing = smoothing\n        self.aux_factor = aux_factor\n        self.reduction = reduction\n        self.weight = weight\n\n    def construct(self, logits, labels):\n        loss_aux = 0\n\n        if isinstance(logits, tuple):\n            main_logits = logits[0]\n            for aux in logits[1:]:\n                if self.aux_factor > 0:\n                    loss_aux += F.cross_entropy(\n                        aux, labels, weight=self.weight, reduction=self.reduction, label_smoothing=self.smoothing\n                    )\n        else:\n            main_logits = logits\n\n        loss_logits = F.cross_entropy(\n            main_logits, labels, weight=self.weight, reduction=self.reduction, label_smoothing=self.smoothing\n        )\n        loss = loss_logits + self.aux_factor * loss_aux\n        return loss\n
"},{"location":"reference/loss/#binary-cross-entropy","title":"Binary Cross Entropy","text":""},{"location":"reference/loss/#mindcv.loss.binary_cross_entropy_smooth.BinaryCrossEntropySmooth","title":"mindcv.loss.binary_cross_entropy_smooth.BinaryCrossEntropySmooth","text":"

Bases: nn.LossBase

Binary cross entropy loss with label smoothing. Apply sigmoid activation function to input logits, and uses the given logits to compute binary cross entropy between the logits and the label.

PARAMETER DESCRIPTION smoothing

Label smoothing factor, a regularization tool used to prevent the model from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.

DEFAULT: 0.0

aux_factor

Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs (i.e., deep supervision), like inception_v3. Default: 0.0.

DEFAULT: 0.0

reduction

Apply specific reduction method to the output: 'mean' or 'sum'. Default: 'mean'.

DEFAULT: 'mean'

weight

Class weight. A rescaling weight applied to the loss of each batch element. Shape [C]. It can be broadcast to a tensor with shape of logits. Data type must be float16 or float32.

TYPE: Tensor DEFAULT: None

pos_weight

Positive weight for each class. A weight of positive examples. Shape [C]. Must be a vector with length equal to the number of classes. It can be broadcast to a tensor with shape of logits. Data type must be float16 or float32.

TYPE: Tensor DEFAULT: None

Inputs

logits (Tensor or Tuple of Tensor): (1) Input logits. Shape [N, C], where N is # samples, C is # classes. Or (2) Tuple of two input logits (main_logits and aux_logits) for auxiliary loss. labels (Tensor): Ground truth label, (1) shape [N, C], has the same shape as logits or (2) shape [N]. can be a class probability matrix or one-hot labels. Data type must be float16 or float32.

Source code in mindcv/loss/binary_cross_entropy_smooth.py
class BinaryCrossEntropySmooth(nn.LossBase):\n\"\"\"\n    Binary cross entropy loss with label smoothing.\n    Apply sigmoid activation function to input `logits`, and uses the given logits to compute binary cross entropy\n    between the logits and the label.\n\n    Args:\n        smoothing: Label smoothing factor, a regularization tool used to prevent the model\n            from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.\n        aux_factor: Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs\n            (i.e., deep supervision), like inception_v3.  Default: 0.0.\n        reduction: Apply specific reduction method to the output: 'mean' or 'sum'. Default: 'mean'.\n        weight (Tensor): Class weight. A rescaling weight applied to the loss of each batch element. Shape [C].\n            It can be broadcast to a tensor with shape of `logits`. Data type must be float16 or float32.\n        pos_weight (Tensor): Positive weight for each class. A weight of positive examples. Shape [C].\n            Must be a vector with length equal to the number of classes.\n            It can be broadcast to a tensor with shape of `logits`. Data type must be float16 or float32.\n\n    Inputs:\n        logits (Tensor or Tuple of Tensor): (1) Input logits. Shape [N, C], where N is # samples, C is # classes.\n            Or (2) Tuple of two input logits (main_logits and aux_logits) for auxiliary loss.\n        labels (Tensor): Ground truth label, (1) shape [N, C], has the same shape as `logits` or (2) shape [N].\n            can be a class probability matrix or one-hot labels. Data type must be float16 or float32.\n    \"\"\"\n\n    def __init__(self, smoothing=0.0, aux_factor=0.0, reduction=\"mean\", weight=None, pos_weight=None):\n        super().__init__()\n        self.smoothing = smoothing\n        self.aux_factor = aux_factor\n        self.reduction = reduction\n        self.weight = weight\n        self.pos_weight = pos_weight\n        self.ones = P.OnesLike()\n        self.one_hot = P.OneHot()\n\n    def construct(self, logits, labels):\n        loss_aux = 0\n        aux_logits = None\n\n        if isinstance(logits, tuple):\n            main_logits = logits[0]\n        else:\n            main_logits = logits\n\n        if main_logits.size != labels.size:\n            # We must explicitly convert the label to one-hot,\n            # for binary_cross_entropy_with_logits restricting input and label have the same shape.\n            class_dim = 0 if main_logits.ndim == 1 else 1\n            n_classes = main_logits.shape[class_dim]\n            labels = self.one_hot(labels, n_classes, Tensor(1.0), Tensor(0.0))\n\n        ones_input = self.ones(main_logits)\n        if self.weight is not None:\n            weight = self.weight\n        else:\n            weight = ones_input\n        if self.pos_weight is not None:\n            pos_weight = self.pos_weight\n        else:\n            pos_weight = ones_input\n\n        if self.smoothing > 0.0:\n            class_dim = 0 if main_logits.ndim == 1 else -1\n            n_classes = main_logits.shape[class_dim]\n            labels = labels * (1 - self.smoothing) + self.smoothing / n_classes\n\n        if self.aux_factor > 0 and aux_logits is not None:\n            for aux_logits in logits[1:]:\n                loss_aux += F.binary_cross_entropy_with_logits(\n                    aux_logits, labels, weight=weight, pos_weight=pos_weight, reduction=self.reduction\n                )\n        # else:\n        #    warnings.warn(\"There are logit tuple input, but the auxiliary loss factor is 0.\")\n\n        loss_logits = F.binary_cross_entropy_with_logits(\n            main_logits, labels, weight=weight, pos_weight=pos_weight, reduction=self.reduction\n        )\n\n        loss = loss_logits + self.aux_factor * loss_aux\n\n        return loss\n
"},{"location":"reference/models.layers/","title":"Common Layers in Model","text":""},{"location":"reference/models.layers/#activation","title":"Activation","text":""},{"location":"reference/models.layers/#mindcv.models.layers.activation.Swish","title":"mindcv.models.layers.activation.Swish","text":"

Bases: nn.Cell

Swish activation function: x * sigmoid(x).

Return

Tensor

Example

x = Tensor(((20, 16), (50, 50)), mindspore.float32) Swish()(x)

Source code in mindcv/models/layers/activation.py
class Swish(nn.Cell):\n\"\"\"\n    Swish activation function: x * sigmoid(x).\n\n    Args:\n        None\n\n    Return:\n        Tensor\n\n    Example:\n        >>> x = Tensor(((20, 16), (50, 50)), mindspore.float32)\n        >>> Swish()(x)\n    \"\"\"\n\n    def __init__(self):\n        super().__init__()\n        self.result = None\n        self.sigmoid = nn.Sigmoid()\n\n    def construct(self, x):\n        result = x * self.sigmoid(x)\n        return result\n
"},{"location":"reference/models.layers/#droppath","title":"DropPath","text":""},{"location":"reference/models.layers/#mindcv.models.layers.drop_path.DropPath","title":"mindcv.models.layers.drop_path.DropPath","text":"

Bases: nn.Cell

DropPath (Stochastic Depth) regularization layers

Source code in mindcv/models/layers/drop_path.py
class DropPath(nn.Cell):\n\"\"\"DropPath (Stochastic Depth) regularization layers\"\"\"\n\n    def __init__(\n        self,\n        drop_prob: float = 0.0,\n        scale_by_keep: bool = True,\n    ) -> None:\n        super().__init__()\n        self.keep_prob = 1.0 - drop_prob\n        self.scale_by_keep = scale_by_keep\n        self.dropout = Dropout(p=drop_prob)\n\n    def construct(self, x: Tensor) -> Tensor:\n        if self.keep_prob == 1.0 or not self.training:\n            return x\n        shape = (x.shape[0],) + (1,) * (x.ndim - 1)\n        random_tensor = self.dropout(ones(shape))\n        if not self.scale_by_keep:\n            random_tensor = ops.mul(random_tensor, self.keep_prob)\n        return x * random_tensor\n
"},{"location":"reference/models.layers/#identity","title":"Identity","text":""},{"location":"reference/models.layers/#mindcv.models.layers.identity.Identity","title":"mindcv.models.layers.identity.Identity","text":"

Bases: nn.Cell

Identity

Source code in mindcv/models/layers/identity.py
class Identity(nn.Cell):\n\"\"\"Identity\"\"\"\n\n    def construct(self, x):\n        return x\n
"},{"location":"reference/models.layers/#mlp","title":"MLP","text":""},{"location":"reference/models.layers/#mindcv.models.layers.mlp.Mlp","title":"mindcv.models.layers.mlp.Mlp","text":"

Bases: nn.Cell

Source code in mindcv/models/layers/mlp.py
class Mlp(nn.Cell):\n    def __init__(\n        self,\n        in_features: int,\n        hidden_features: Optional[int] = None,\n        out_features: Optional[int] = None,\n        act_layer: Optional[nn.Cell] = nn.GELU,\n        drop: float = 0.0,\n    ) -> None:\n        super().__init__()\n        out_features = out_features or in_features\n        hidden_features = hidden_features or in_features\n        self.fc1 = nn.Dense(in_channels=in_features, out_channels=hidden_features, has_bias=True)\n        self.act = act_layer()\n        self.fc2 = nn.Dense(in_channels=hidden_features, out_channels=out_features, has_bias=True)\n        self.drop = Dropout(p=drop)\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.fc1(x)\n        x = self.act(x)\n        x = self.drop(x)\n        x = self.fc2(x)\n        x = self.drop(x)\n        return x\n
"},{"location":"reference/models.layers/#patch-embedding","title":"Patch Embedding","text":""},{"location":"reference/models.layers/#mindcv.models.layers.patch_embed.PatchEmbed","title":"mindcv.models.layers.patch_embed.PatchEmbed","text":"

Bases: nn.Cell

Image to Patch Embedding

PARAMETER DESCRIPTION image_size

Image size. Default: 224.

TYPE: int DEFAULT: 224

patch_size

Patch token size. Default: 4.

TYPE: int DEFAULT: 4

in_chans

Number of input image channels. Default: 3.

TYPE: int DEFAULT: 3

embed_dim

Number of linear projection output channels. Default: 96.

TYPE: int DEFAULT: 96

norm_layer

Normalization layer. Default: None

TYPE: nn.Cell DEFAULT: None

Source code in mindcv/models/layers/patch_embed.py
class PatchEmbed(nn.Cell):\n\"\"\"Image to Patch Embedding\n\n    Args:\n        image_size (int): Image size.  Default: 224.\n        patch_size (int): Patch token size. Default: 4.\n        in_chans (int): Number of input image channels. Default: 3.\n        embed_dim (int): Number of linear projection output channels. Default: 96.\n        norm_layer (nn.Cell, optional): Normalization layer. Default: None\n    \"\"\"\n\n    def __init__(\n        self,\n        image_size: int = 224,\n        patch_size: int = 4,\n        in_chans: int = 3,\n        embed_dim: int = 96,\n        norm_layer: Optional[nn.Cell] = None,\n    ) -> None:\n        super().__init__()\n        image_size = to_2tuple(image_size)\n        patch_size = to_2tuple(patch_size)\n        patches_resolution = [image_size[0] // patch_size[0], image_size[1] // patch_size[1]]\n        self.image_size = image_size\n        self.patch_size = patch_size\n        self.patches_resolution = patches_resolution\n        self.num_patches = patches_resolution[0] * patches_resolution[1]\n\n        self.in_chans = in_chans\n        self.embed_dim = embed_dim\n\n        self.proj = nn.Conv2d(in_channels=in_chans, out_channels=embed_dim, kernel_size=patch_size, stride=patch_size,\n                              pad_mode='pad', has_bias=True, weight_init=\"TruncatedNormal\")\n\n        if norm_layer is not None:\n            if isinstance(embed_dim, int):\n                embed_dim = (embed_dim,)\n            self.norm = norm_layer(embed_dim, epsilon=1e-5)\n        else:\n            self.norm = None\n\n    def construct(self, x: Tensor) -> Tensor:\n\"\"\"docstring\"\"\"\n        B = x.shape[0]\n        # FIXME look at relaxing size constraints\n        x = ops.Reshape()(self.proj(x), (B, self.embed_dim, -1))  # B Ph*Pw C\n        x = ops.Transpose()(x, (0, 2, 1))\n\n        if self.norm is not None:\n            x = self.norm(x)\n        return x\n
"},{"location":"reference/models.layers/#mindcv.models.layers.patch_embed.PatchEmbed.construct","title":"mindcv.models.layers.patch_embed.PatchEmbed.construct(x)","text":"

docstring

Source code in mindcv/models/layers/patch_embed.py
def construct(self, x: Tensor) -> Tensor:\n\"\"\"docstring\"\"\"\n    B = x.shape[0]\n    # FIXME look at relaxing size constraints\n    x = ops.Reshape()(self.proj(x), (B, self.embed_dim, -1))  # B Ph*Pw C\n    x = ops.Transpose()(x, (0, 2, 1))\n\n    if self.norm is not None:\n        x = self.norm(x)\n    return x\n
"},{"location":"reference/models.layers/#pooling","title":"Pooling","text":""},{"location":"reference/models.layers/#mindcv.models.layers.pooling.GlobalAvgPooling","title":"mindcv.models.layers.pooling.GlobalAvgPooling","text":"

Bases: nn.Cell

GlobalAvgPooling, same as torch.nn.AdaptiveAvgPool2d when output shape is 1

Source code in mindcv/models/layers/pooling.py
class GlobalAvgPooling(nn.Cell):\n\"\"\"\n    GlobalAvgPooling, same as torch.nn.AdaptiveAvgPool2d when output shape is 1\n    \"\"\"\n\n    def __init__(self, keep_dims: bool = False) -> None:\n        super().__init__()\n        self.keep_dims = keep_dims\n\n    def construct(self, x):\n        x = ops.mean(x, axis=(2, 3), keep_dims=self.keep_dims)\n        return x\n
"},{"location":"reference/models.layers/#selective-kernel","title":"Selective Kernel","text":""},{"location":"reference/models.layers/#mindcv.models.layers.selective_kernel.SelectiveKernelAttn","title":"mindcv.models.layers.selective_kernel.SelectiveKernelAttn","text":"

Bases: nn.Cell

Selective Kernel Attention Module Selective Kernel attention mechanism factored out into its own module.

Source code in mindcv/models/layers/selective_kernel.py
class SelectiveKernelAttn(nn.Cell):\n\"\"\"Selective Kernel Attention Module\n    Selective Kernel attention mechanism factored out into its own module.\n    \"\"\"\n\n    def __init__(\n        self,\n        channels: int,\n        num_paths: int = 2,\n        attn_channels: int = 32,\n        activation: Optional[nn.Cell] = nn.ReLU,\n        norm: Optional[nn.Cell] = nn.BatchNorm2d,\n    ):\n        super().__init__()\n        self.num_paths = num_paths\n        self.mean = GlobalAvgPooling(keep_dims=True)\n        self.fc_reduce = nn.Conv2d(channels, attn_channels, kernel_size=1, has_bias=False)\n        self.bn = norm(attn_channels)\n        self.act = activation()\n        self.fc_select = nn.Conv2d(attn_channels, channels * num_paths, kernel_size=1)\n        self.softmax = nn.Softmax(axis=1)\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.mean((x.sum(1)))\n        x = self.fc_reduce(x)\n        x = self.bn(x)\n        x = self.act(x)\n        x = self.fc_select(x)\n        b, c, h, w = x.shape\n        x = x.reshape((b, self.num_paths, c // self.num_paths, h, w))\n        x = self.softmax(x)\n        return x\n
"},{"location":"reference/models.layers/#mindcv.models.layers.selective_kernel.SelectiveKernel","title":"mindcv.models.layers.selective_kernel.SelectiveKernel","text":"

Bases: nn.Cell

Selective Kernel Convolution Module As described in Selective Kernel Networks (https://arxiv.org/abs/1903.06586) with some modifications. Largest change is the input split, which divides the input channels across each convolution path, this can be viewed as a grouping of sorts, but the output channel counts expand to the module level value. This keeps the parameter count from ballooning when the convolutions themselves don't have groups, but still provides a noteworthy increase in performance over similar param count models without this attention layer. -Ross W

PARAMETER DESCRIPTION in_channels

module input (feature) channel count

TYPE: int

out_channels

module output (feature) channel count

TYPE: int DEFAULT: None

kernel_size

kernel size for each convolution branch

TYPE: (int, list) DEFAULT: None

stride

stride for convolutions

TYPE: int DEFAULT: 1

dilation

dilation for module as a whole, impacts dilation of each branch

TYPE: int DEFAULT: 1

groups

number of groups for each branch

TYPE: int DEFAULT: 1

rd_ratio

reduction factor for attention features

TYPE: (int, float) DEFAULT: 1.0 / 16

rd_channels(int)

reduction channels can be specified directly by arg (if rd_channels is set)

rd_divisor(int)

divisor can be specified to keep channels

keep_3x3

keep all branch convolution kernels as 3x3, changing larger kernels for dilations

TYPE: bool DEFAULT: True

split_input

split input channels evenly across each convolution branch, keeps param count lower, can be viewed as grouping by path, output expands to module out_channels count

TYPE: bool DEFAULT: True

activation

activation layer to use

TYPE: nn.Module DEFAULT: nn.ReLU

norm

batchnorm/norm layer to use

TYPE: nn.Module DEFAULT: nn.BatchNorm2d

Source code in mindcv/models/layers/selective_kernel.py
class SelectiveKernel(nn.Cell):\n\"\"\"Selective Kernel Convolution Module\n    As described in Selective Kernel Networks (https://arxiv.org/abs/1903.06586) with some modifications.\n    Largest change is the input split, which divides the input channels across each convolution path, this can\n    be viewed as a grouping of sorts, but the output channel counts expand to the module level value. This keeps\n    the parameter count from ballooning when the convolutions themselves don't have groups, but still provides\n    a noteworthy increase in performance over similar param count models without this attention layer. -Ross W\n    Args:\n        in_channels (int):  module input (feature) channel count\n        out_channels (int):  module output (feature) channel count\n        kernel_size (int, list): kernel size for each convolution branch\n        stride (int): stride for convolutions\n        dilation (int): dilation for module as a whole, impacts dilation of each branch\n        groups (int): number of groups for each branch\n        rd_ratio (int, float): reduction factor for attention features\n        rd_channels(int): reduction channels can be specified directly by arg (if rd_channels is set)\n        rd_divisor(int): divisor can be specified to keep channels\n        keep_3x3 (bool): keep all branch convolution kernels as 3x3, changing larger kernels for dilations\n        split_input (bool): split input channels evenly across each convolution branch, keeps param count lower,\n            can be viewed as grouping by path, output expands to module out_channels count\n        activation (nn.Module): activation layer to use\n        norm (nn.Module): batchnorm/norm layer to use\n    \"\"\"\n\n    def __init__(\n        self,\n        in_channels: int,\n        out_channels: Optional[int] = None,\n        kernel_size: Optional[Union[int, List]] = None,\n        stride: int = 1,\n        dilation: int = 1,\n        groups: int = 1,\n        rd_ratio: float = 1.0 / 16,\n        rd_channels: Optional[int] = None,\n        rd_divisor: int = 8,\n        keep_3x3: bool = True,\n        split_input: bool = True,\n        activation: Optional[nn.Cell] = nn.ReLU,\n        norm: Optional[nn.Cell] = nn.BatchNorm2d,\n    ):\n        super().__init__()\n        out_channels = out_channels or in_channels\n        kernel_size = kernel_size or [3, 5]  # default to one 3x3 and one 5x5 branch. 5x5 -> 3x3 + dilation\n        _kernel_valid(kernel_size)\n        if not isinstance(kernel_size, list):\n            kernel_size = [kernel_size] * 2\n        if keep_3x3:\n            dilation = [dilation * (k - 1) // 2 for k in kernel_size]\n            kernel_size = [3] * len(kernel_size)\n        else:\n            dilation = [dilation] * len(kernel_size)\n        self.num_paths = len(kernel_size)\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        self.split_input = split_input\n        if self.split_input:\n            assert in_channels % self.num_paths == 0\n            in_channels = in_channels // self.num_paths\n        groups = min(out_channels, groups)\n        self.split = Split(split_size_or_sections=self.in_channels // self.num_paths, output_num=self.num_paths, axis=1)\n\n        self.paths = nn.CellList([\n            Conv2dNormActivation(in_channels, out_channels, kernel_size=k, stride=stride, groups=groups,\n                                 dilation=d, activation=activation, norm=norm)\n            for k, d in zip(kernel_size, dilation)\n        ])\n\n        attn_channels = rd_channels or make_divisible(out_channels * rd_ratio, divisor=rd_divisor)\n        self.attn = SelectiveKernelAttn(out_channels, self.num_paths, attn_channels)\n\n    def construct(self, x: Tensor) -> Tensor:\n        x_paths = []\n        if self.split_input:\n            x_split = self.split(x)\n            for i, op in enumerate(self.paths):\n                x_paths.append(op(x_split[i]))\n        else:\n            for op in self.paths:\n                x_paths.append(op(x))\n\n        x = ops.stack(x_paths, axis=1)\n        x_attn = self.attn(x)\n        x = x * x_attn\n        x = x.sum(1)\n        return x\n
"},{"location":"reference/models.layers/#squeeze-and-excite","title":"Squeeze and Excite","text":""},{"location":"reference/models.layers/#mindcv.models.layers.squeeze_excite.SqueezeExcite","title":"mindcv.models.layers.squeeze_excite.SqueezeExcite","text":"

Bases: nn.Cell

SqueezeExcite Module as defined in original SE-Nets with a few additions.

Additions include
  • divisor can be specified to keep channels % div == 0 (default: 8)
  • reduction channels can be specified directly by arg (if rd_channels is set)
  • reduction channels can be specified by float rd_ratio (default: 1/16)
  • customizable activation, normalization, and gate layer
Source code in mindcv/models/layers/squeeze_excite.py
class SqueezeExcite(nn.Cell):\n\"\"\"SqueezeExcite Module as defined in original SE-Nets with a few additions.\n    Additions include:\n        * divisor can be specified to keep channels % div == 0 (default: 8)\n        * reduction channels can be specified directly by arg (if rd_channels is set)\n        * reduction channels can be specified by float rd_ratio (default: 1/16)\n        * customizable activation, normalization, and gate layer\n    \"\"\"\n\n    def __init__(\n        self,\n        in_channels: int,\n        rd_ratio: float = 1.0 / 16,\n        rd_channels: Optional[int] = None,\n        rd_divisor: int = 8,\n        norm: Optional[nn.Cell] = None,\n        act_layer: nn.Cell = nn.ReLU,\n        gate_layer: nn.Cell = nn.Sigmoid,\n    ) -> None:\n        super().__init__()\n        self.norm = norm\n        self.act = act_layer()\n        self.gate = gate_layer()\n        if not rd_channels:\n            rd_channels = make_divisible(in_channels * rd_ratio, rd_divisor)\n\n        self.conv_reduce = nn.Conv2d(\n            in_channels=in_channels,\n            out_channels=rd_channels,\n            kernel_size=1,\n            has_bias=True,\n        )\n        if self.norm:\n            self.bn = nn.BatchNorm2d(rd_channels)\n        self.conv_expand = nn.Conv2d(\n            in_channels=rd_channels,\n            out_channels=in_channels,\n            kernel_size=1,\n            has_bias=True,\n        )\n        self.pool = GlobalAvgPooling(keep_dims=True)\n\n    def construct(self, x: Tensor) -> Tensor:\n        x_se = self.pool(x)\n        x_se = self.conv_reduce(x_se)\n        if self.norm:\n            x_se = self.bn(x_se)\n        x_se = self.act(x_se)\n        x_se = self.conv_expand(x_se)\n        x_se = self.gate(x_se)\n        x = x * x_se\n        return x\n
"},{"location":"reference/models.layers/#mindcv.models.layers.squeeze_excite.SqueezeExciteV2","title":"mindcv.models.layers.squeeze_excite.SqueezeExciteV2","text":"

Bases: nn.Cell

SqueezeExcite Module as defined in original SE-Nets with a few additions. V1 uses 1x1conv to replace fc layers, and V2 uses nn.Dense to implement directly.

Source code in mindcv/models/layers/squeeze_excite.py
class SqueezeExciteV2(nn.Cell):\n\"\"\"SqueezeExcite Module as defined in original SE-Nets with a few additions.\n    V1 uses 1x1conv to replace fc layers, and V2 uses nn.Dense to implement directly.\n    \"\"\"\n\n    def __init__(\n        self,\n        in_channels: int,\n        rd_ratio: float = 1.0 / 16,\n        rd_channels: Optional[int] = None,\n        rd_divisor: int = 8,\n        norm: Optional[nn.Cell] = None,\n        act_layer: nn.Cell = nn.ReLU,\n        gate_layer: nn.Cell = nn.Sigmoid,\n    ) -> None:\n        super().__init__()\n        self.norm = norm\n        self.act = act_layer()\n        self.gate = gate_layer()\n        if not rd_channels:\n            rd_channels = make_divisible(in_channels * rd_ratio, rd_divisor)\n\n        self.conv_reduce = nn.Dense(\n            in_channels=in_channels,\n            out_channels=rd_channels,\n            has_bias=True,\n        )\n        if self.norm:\n            self.bn = nn.BatchNorm2d(rd_channels)\n        self.conv_expand = nn.Dense(\n            in_channels=rd_channels,\n            out_channels=in_channels,\n            has_bias=True,\n        )\n        self.pool = GlobalAvgPooling(keep_dims=False)\n\n    def construct(self, x: Tensor) -> Tensor:\n        x_se = self.pool(x)\n        x_se = self.conv_reduce(x_se)\n        if self.norm:\n            x_se = self.bn(x_se)\n        x_se = self.act(x_se)\n        x_se = self.conv_expand(x_se)\n        x_se = self.gate(x_se)\n        x_se = ops.expand_dims(x_se, -1)\n        x_se = ops.expand_dims(x_se, -1)\n        x = x * x_se\n        return x\n
"},{"location":"reference/models/","title":"Models","text":""},{"location":"reference/models/#create-model","title":"Create Model","text":""},{"location":"reference/models/#mindcv.models.model_factory.create_model","title":"mindcv.models.model_factory.create_model(model_name, num_classes=1000, pretrained=False, in_channels=3, checkpoint_path='', ema=False, auto_mapping=False, **kwargs)","text":"

Creates model by name.

PARAMETER DESCRIPTION model_name

The name of model.

TYPE: str

num_classes

The number of classes. Default: 1000.

TYPE: int DEFAULT: 1000

pretrained

Whether to load the pretrained model. Default: False.

TYPE: bool DEFAULT: False

in_channels

The input channels. Default: 3.

TYPE: int DEFAULT: 3

checkpoint_path

The path of checkpoint files. Default: \"\".

TYPE: str DEFAULT: ''

ema

Whether use ema method. Default: False.

TYPE: bool DEFAULT: False

auto_mapping

Whether to automatically map the names of checkpoint weights to the names of model weights when there are differences in names. Default: False.

TYPE: bool DEFAULT: False

Source code in mindcv/models/model_factory.py
def create_model(\n    model_name: str,\n    num_classes: int = 1000,\n    pretrained: bool = False,\n    in_channels: int = 3,\n    checkpoint_path: str = \"\",\n    ema: bool = False,\n    auto_mapping: bool = False,\n    **kwargs,\n):\nr\"\"\"Creates model by name.\n\n    Args:\n        model_name (str):  The name of model.\n        num_classes (int): The number of classes. Default: 1000.\n        pretrained (bool): Whether to load the pretrained model. Default: False.\n        in_channels (int): The input channels. Default: 3.\n        checkpoint_path (str): The path of checkpoint files. Default: \"\".\n        ema (bool): Whether use ema method. Default: False.\n        auto_mapping (bool): Whether to automatically map the names of checkpoint weights\n            to the names of model weights when there are differences in names. Default: False.\n    \"\"\"\n\n    if checkpoint_path != \"\" and pretrained:\n        raise ValueError(\"checkpoint_path is mutually exclusive with pretrained\")\n\n    model_args = dict(num_classes=num_classes, pretrained=pretrained, in_channels=in_channels)\n    kwargs = {k: v for k, v in kwargs.items() if v is not None}\n\n    if not is_model(model_name):\n        raise RuntimeError(f\"Unknown model {model_name}\")\n\n    create_fn = model_entrypoint(model_name)\n    model = create_fn(**model_args, **kwargs)\n\n    if checkpoint_path:\n        load_model_checkpoint(model, checkpoint_path, ema, auto_mapping)\n\n    return model\n
"},{"location":"reference/models/#bit","title":"bit","text":""},{"location":"reference/models/#mindcv.models.bit.BiT_ResNet","title":"mindcv.models.bit.BiT_ResNet","text":"

Bases: nn.Cell

BiT_ResNet model class, based on \"Big Transfer (BiT): General Visual Representation Learning\" <https://arxiv.org/abs/1912.11370>_

PARAMETER DESCRIPTION block(Union[Bottleneck])

block of BiT_ResNetv2.

layers(tuple(int))

number of layers of each stage.

wf(int)

width of each layer. Default: 1.

num_classes(int)

number of classification classes. Default: 1000.

in_channels(int)

number the channels of the input. Default: 3.

groups(int)

number of groups for group conv in blocks. Default: 1.

base_width(int)

base width of pre group hidden channel in blocks. Default: 64.

norm(nn.Cell)

normalization layer in blocks. Default: None.

Source code in mindcv/models/bit.py
class BiT_ResNet(nn.Cell):\nr\"\"\"BiT_ResNet model class, based on\n    `\"Big Transfer (BiT): General Visual Representation Learning\" <https://arxiv.org/abs/1912.11370>`_\n    Args:\n        block(Union[Bottleneck]): block of BiT_ResNetv2.\n        layers(tuple(int)): number of layers of each stage.\n        wf(int): width of each layer. Default: 1.\n        num_classes(int): number of classification classes. Default: 1000.\n        in_channels(int): number the channels of the input. Default: 3.\n        groups(int): number of groups for group conv in blocks. Default: 1.\n        base_width(int): base width of pre group hidden channel in blocks. Default: 64.\n        norm(nn.Cell): normalization layer in blocks. Default: None.\n    \"\"\"\n\n    def __init__(\n        self,\n        block: Type[Union[Bottleneck]],\n        layers: List[int],\n        wf: int = 1,\n        num_classes: int = 1000,\n        in_channels: int = 3,\n        groups: int = 1,\n        base_width: int = 64,\n        norm: Optional[nn.Cell] = None,\n    ) -> None:\n        super().__init__()\n\n        if norm is None:\n            norm = nn.GroupNorm\n\n        self.norm: nn.Cell = norm  # add type hints to make pylint happy\n        self.input_channels = 64 * wf\n        self.groups = groups\n        self.base_with = base_width\n\n        self.conv1 = StdConv2d(in_channels, self.input_channels, kernel_size=7,\n                               stride=2, pad_mode=\"pad\", padding=3)\n        self.pad = nn.ConstantPad2d(1, 0)\n        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode=\"valid\")\n\n        self.layer1 = self._make_layer(block, 64 * wf, layers[0])\n        self.layer2 = self._make_layer(block, 128 * wf, layers[1], stride=2)\n        self.layer3 = self._make_layer(block, 256 * wf, layers[2], stride=2)\n        self.layer4 = self._make_layer(block, 512 * wf, layers[3], stride=2)\n\n        self.gn = norm(32, 2048 * wf)\n        self.relu = nn.ReLU()\n        self.pool = GlobalAvgPooling(keep_dims=True)\n        self.classifier = nn.Conv2d(512 * block.expansion * wf, num_classes, kernel_size=1, has_bias=True)\n\n    def _make_layer(\n        self,\n        block: Type[Union[Bottleneck]],\n        channels: int,\n        block_nums: int,\n        stride: int = 1,\n    ) -> nn.SequentialCell:\n\"\"\"build model depending on cfgs\"\"\"\n        down_sample = None\n\n        if stride != 1 or self.input_channels != channels * block.expansion:\n            down_sample = nn.SequentialCell([\n                StdConv2d(self.input_channels, channels * block.expansion, kernel_size=1, stride=stride),\n            ])\n\n        layers = []\n        layers.append(\n            block(\n                self.input_channels,\n                channels,\n                stride=stride,\n                down_sample=down_sample,\n                groups=self.groups,\n                base_width=self.base_with,\n                norm=self.norm,\n            )\n        )\n        self.input_channels = channels * block.expansion\n\n        for _ in range(1, block_nums):\n            layers.append(\n                block(\n                    self.input_channels,\n                    channels,\n                    groups=self.groups,\n                    base_width=self.base_with,\n                    norm=self.norm,\n                )\n            )\n\n        return nn.SequentialCell(layers)\n\n    def root(self, x: Tensor) -> Tensor:\n        x = self.conv1(x)\n        x = self.pad(x)\n        x = self.max_pool(x)\n        return x\n\n    def forward_features(self, x: Tensor) -> Tensor:\n\"\"\"Network forward feature extraction.\"\"\"\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n        return x\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.gn(x)\n        x = self.relu(x)\n        x = self.pool(x)\n        x = self.classifier(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.root(x)\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        assert x.shape[-2:] == (1, 1)  # We should have no spatial shape left.\n        return x[..., 0, 0]\n
"},{"location":"reference/models/#mindcv.models.bit.BiT_ResNet.forward_features","title":"mindcv.models.bit.BiT_ResNet.forward_features(x)","text":"

Network forward feature extraction.

Source code in mindcv/models/bit.py
def forward_features(self, x: Tensor) -> Tensor:\n\"\"\"Network forward feature extraction.\"\"\"\n    x = self.layer1(x)\n    x = self.layer2(x)\n    x = self.layer3(x)\n    x = self.layer4(x)\n    return x\n
"},{"location":"reference/models/#mindcv.models.bit.BiT_resnet101","title":"mindcv.models.bit.BiT_resnet101(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 101 layers ResNet model. Refer to the base class models.BiT_Resnet for more details.

Source code in mindcv/models/bit.py
@register_model\ndef BiT_resnet101(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n\"\"\"Get 101 layers ResNet model.\n    Refer to the base class `models.BiT_Resnet` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"BiT_resnet101\"]\n    model = BiT_ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.bit.BiT_resnet50","title":"mindcv.models.bit.BiT_resnet50(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 50 layers ResNet model. Refer to the base class models.BiT_Resnet for more details.

Source code in mindcv/models/bit.py
@register_model\ndef BiT_resnet50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n\"\"\"Get 50 layers ResNet model.\n    Refer to the base class `models.BiT_Resnet` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"BiT_resnet50\"]\n    model = BiT_ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.bit.BiT_resnet50x3","title":"mindcv.models.bit.BiT_resnet50x3(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 50 layers ResNet model. Refer to the base class models.BiT_Resnet for more details.

Source code in mindcv/models/bit.py
@register_model\ndef BiT_resnet50x3(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n\"\"\"Get 50 layers ResNet model.\n     Refer to the base class `models.BiT_Resnet` for more details.\n     \"\"\"\n    default_cfg = default_cfgs[\"BiT_resnet50x3\"]\n    model = BiT_ResNet(Bottleneck, [3, 4, 6, 3], wf=3, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#cait","title":"cait","text":""},{"location":"reference/models/#mindcv.models.cait.CaiT","title":"mindcv.models.cait.CaiT","text":"

Bases: nn.Cell

Source code in mindcv/models/cait.py
class CaiT(nn.Cell):\n    def __init__(self,\n                 img_size: int = 224,\n                 patch_size: int = 16,\n                 in_channels: int = 3,\n                 num_classes: int = 1000,\n                 embed_dim: int = 768,\n                 depth: int = 12,\n                 num_heads: int = 12,\n                 mlp_ratio: float = 4.,\n                 qkv_bias: bool = False,\n                 qk_scale: float = None,\n                 drop_rate: float = 0.,\n                 attn_drop_rate: float = 0.,\n                 drop_path_rate: float = 0.,\n                 norm_layer: nn.Cell = nn.LayerNorm,\n                 act_layer: nn.Cell = nn.GELU,\n                 init_values: float = 1e-4,\n                 depth_token_only: int = 2,\n                 mlp_ratio_clstk: float = 4.0) -> None:\n        super(CaiT, self).__init__()\n        self.num_classes = num_classes\n        self.embed_dim = embed_dim\n\n        self.patch_embed = PatchEmbed(image_size=img_size,\n                                      patch_size=patch_size,\n                                      in_chans=in_channels,\n                                      embed_dim=embed_dim)\n\n        num_patches = self.patch_embed.num_patches\n\n        zeros = ops.Zeros()\n        self.cls_token = Parameter(zeros((1, 1, embed_dim), ms.float32))\n        self.pos_embed = Parameter(zeros((1, num_patches, embed_dim), ms.float32))\n        self.pos_drop = Dropout(p=drop_rate)\n\n        dpr = [drop_path_rate for i in range(depth)]\n\n        self.blocks = []\n        self.blocks_token_only = []\n\n        self.blocks = nn.CellList([\n            LayerScaleBlockSA(\n                dim=embed_dim,\n                num_heads=num_heads,\n                mlp_ratio=mlp_ratio,\n                qkv_bias=qkv_bias,\n                qk_scale=qk_scale,\n                drop_rate=drop_rate,\n                attn_drop_rate=attn_drop_rate,\n                drop_path_rate=dpr[i],\n                act_layer=act_layer,\n                norm_layer=norm_layer,\n                init_values=init_values)\n            for i in range(depth)])\n        self.blocks_token_only = nn.CellList([\n            LayerScaleBlockCA(\n                dim=embed_dim,\n                num_heads=num_heads,\n                mlp_ratio=mlp_ratio,\n                qkv_bias=qkv_bias,\n                qk_scale=qk_scale,\n                drop_rate=0.0,\n                attn_drop_rate=0.0,\n                drop_path_rate=0.0,\n                act_layer=act_layer,\n                norm_layer=norm_layer,\n                init_values=init_values)\n            for i in range(depth_token_only)])\n\n        self.norm = norm_layer((embed_dim,))\n\n        self.head = nn.Dense(embed_dim, num_classes) if num_classes > 0 else nn.Identity()\n\n        self.pos_embed = init.initializer(TruncatedNormal(sigma=0.02), self.pos_embed.shape, ms.float32)\n        self.cls_token = init.initializer(TruncatedNormal(sigma=0.02), self.cls_token.shape, ms.float32)\n\n        self._initialize_weights()\n\n    def _initialize_weights(self) -> None:\n        for name, m in self.cells_and_names():\n            if isinstance(m, nn.Dense):\n                m.weight = init.initializer(TruncatedNormal(sigma=0.02), m.weight.shape, ms.float32)\n                if m.bias is not None:\n                    m.bias.set_data(init.initializer(init.Constant(0), m.bias.shape))\n            elif isinstance(m, nn.LayerNorm):\n                m.beta.set_data(init.initializer(init.Constant(0), m.beta.shape))\n                m.gamma.set_data(init.initializer(init.Constant(1), m.gamma.shape))\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        B = x.shape[0]\n        x = self.patch_embed(x)\n\n        cls_tokens = ops.broadcast_to(self.cls_token, (B, -1, -1))\n\n        x = x + self.pos_embed\n        x = self.pos_drop(x)\n\n        for i , blk in enumerate(self.blocks):\n            x = blk(x)\n        for i , blk in enumerate(self.blocks_token_only):\n            cls_tokens = blk(x, cls_tokens)\n\n        x = ops.concat((cls_tokens, x), axis=1)\n\n        x = self.norm(x)\n        return x[:, 0]\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.head(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.cait.cait_m36_384","title":"mindcv.models.cait.cait_m36_384(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/cait.py
@register_model\ndef cait_m36_384(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:\n    model = CaiT(img_size=384, patch_size=16, in_channels=in_channels, num_classes=num_classes,\n                 embed_dim=768, depth=36, num_heads=16, mlp_ratio=4, qkv_bias=False,\n                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-6, depth_token_only=2,\n                 **kwargs)\n\n    if pretrained:\n        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.cait.cait_m48_448","title":"mindcv.models.cait.cait_m48_448(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/cait.py
@register_model\ndef cait_m48_448(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:\n    model = CaiT(img_size=448, patch_size=16, in_channels=in_channels, num_classes=num_classes,\n                 embed_dim=768, depth=48, num_heads=16, mlp_ratio=4, qkv_bias=False,\n                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-6, depth_token_only=2,\n                 **kwargs)\n\n    if pretrained:\n        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.cait.cait_s24_224","title":"mindcv.models.cait.cait_s24_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/cait.py
@register_model\ndef cait_s24_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:\n    model = CaiT(img_size=224, patch_size=16, in_channels=in_channels, num_classes=num_classes,\n                 embed_dim=384, depth=24, num_heads=8, mlp_ratio=4, qkv_bias=False,\n                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-5, depth_token_only=2,\n                 **kwargs)\n\n    if pretrained:\n        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.cait.cait_s24_384","title":"mindcv.models.cait.cait_s24_384(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/cait.py
@register_model\ndef cait_s24_384(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:\n    model = CaiT(img_size=384, patch_size=16, in_channels=in_channels, num_classes=num_classes,\n                 embed_dim=384, depth=24, num_heads=8, mlp_ratio=4, qkv_bias=False,\n                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-5, depth_token_only=2,\n                 **kwargs)\n\n    if pretrained:\n        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.cait.cait_s36_384","title":"mindcv.models.cait.cait_s36_384(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/cait.py
@register_model\ndef cait_s36_384(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:\n    model = CaiT(img_size=384, patch_size=16, in_channels=in_channels, num_classes=num_classes,\n                 embed_dim=384, depth=36, num_heads=8, mlp_ratio=4, qkv_bias=False,\n                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-6, depth_token_only=2,\n                 **kwargs)\n\n    if pretrained:\n        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.cait.cait_xs24_384","title":"mindcv.models.cait.cait_xs24_384(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/cait.py
@register_model\ndef cait_xs24_384(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:\n    model = CaiT(img_size=384, patch_size=16, in_channels=in_channels, num_classes=num_classes,\n                 embed_dim=288, depth=24, num_heads=6, mlp_ratio=4, qkv_bias=False,\n                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-5, depth_token_only=2,\n                 **kwargs)\n\n    if pretrained:\n        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.cait.cait_xxs24_224","title":"mindcv.models.cait.cait_xxs24_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/cait.py
@register_model\ndef cait_xxs24_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:\n    model = CaiT(img_size=224, patch_size=16, in_channels=in_channels, num_classes=num_classes,\n                 embed_dim=192, depth=24, num_heads=4, mlp_ratio=4, qkv_bias=False,\n                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-5, depth_token_only=2,\n                 **kwargs)\n\n    if pretrained:\n        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#cmt","title":"cmt","text":""},{"location":"reference/models/#mindcv.models.cmt.CMT","title":"mindcv.models.cmt.CMT","text":"

Bases: nn.Cell

Source code in mindcv/models/cmt.py
class CMT(nn.Cell):\n    def __init__(\n        self,\n        img_size=224,\n        in_channels=3,\n        num_classes=1000,\n        embed_dims=None,\n        stem_channel=16,\n        fc_dim=1280,\n        num_heads=None,\n        mlp_ratios=None,\n        qkv_bias=True,\n        qk_scale=None,\n        drop_rate=0.0,\n        attn_drop_rate=0.0,\n        drop_path_rate=0.0,\n        norm_layer=None,\n        depths=None,\n        qk_ratio=1,\n        sr_ratios=None,\n    ):\n        super().__init__()\n        self.num_classes = num_classes\n        self.num_features = self.embed_dim = embed_dims[-1]\n        norm_layer = norm_layer or nn.LayerNorm\n\n        self.stem_conv1 = nn.Conv2d(\n            3, stem_channel, kernel_size=3, stride=2, pad_mode='pad', padding=1, has_bias=True)\n        self.stem_relu1 = nn.GELU()\n        self.stem_norm1 = nn.BatchNorm2d(stem_channel)\n\n        self.stem_conv2 = nn.Conv2d(\n            stem_channel, stem_channel, kernel_size=3, stride=1, pad_mode='pad', padding=1, has_bias=True)\n        self.stem_relu2 = nn.GELU()\n        self.stem_norm2 = nn.BatchNorm2d(stem_channel)\n\n        self.stem_conv3 = nn.Conv2d(\n            stem_channel, stem_channel, kernel_size=3, stride=1, pad_mode='pad', padding=1, has_bias=True)\n        self.stem_relu3 = nn.GELU()\n        self.stem_norm3 = nn.BatchNorm2d(stem_channel)\n\n        self.patch_embed_a = PatchEmbed(\n            img_size=img_size // 2, patch_size=2, in_chans=stem_channel, embed_dim=embed_dims[0])\n        self.patch_embed_b = PatchEmbed(\n            img_size=img_size // 4, patch_size=2, in_chans=embed_dims[0], embed_dim=embed_dims[1])\n        self.patch_embed_c = PatchEmbed(\n            img_size=img_size // 8, patch_size=2, in_chans=embed_dims[1], embed_dim=embed_dims[2])\n        self.patch_embed_d = PatchEmbed(\n            img_size=img_size // 16, patch_size=2, in_chans=embed_dims[2], embed_dim=embed_dims[3])\n\n        self.relative_pos_a = ops.zeros(\n            (num_heads[0], self.patch_embed_a.num_patches,\n             self.patch_embed_a.num_patches // sr_ratios[0] // sr_ratios[0]),\n            mindspore.float32)\n        self.relative_pos_b = ops.zeros(\n            (num_heads[1], self.patch_embed_b.num_patches,\n             self.patch_embed_b.num_patches // sr_ratios[1] // sr_ratios[1]),\n            mindspore.float32)\n        self.relative_pos_c = ops.zeros(\n            (num_heads[2], self.patch_embed_c.num_patches,\n             self.patch_embed_c.num_patches // sr_ratios[2] // sr_ratios[2]),\n            mindspore.float32)\n        self.relative_pos_d = ops.zeros(\n            (num_heads[3], self.patch_embed_d.num_patches,\n             self.patch_embed_d.num_patches // sr_ratios[3] // sr_ratios[3]),\n            mindspore.float32)\n\n        # stochastic depth decay rule\n        dpr = [x.item() for x in np.linspace(0, drop_path_rate, sum(depths))]\n        cur = 0\n        self.blocks_a = nn.CellList([\n            Block(\n                dim=embed_dims[0], num_heads=num_heads[0], mlp_ratio=mlp_ratios[0], qkv_bias=qkv_bias,\n                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[\n                    cur + i],\n                norm_layer=norm_layer, qk_ratio=qk_ratio, sr_ratio=sr_ratios[0])\n            for i in range(depths[0])])\n        cur += depths[0]\n        self.blocks_b = nn.CellList([\n            Block(\n                dim=embed_dims[1], num_heads=num_heads[1], mlp_ratio=mlp_ratios[1], qkv_bias=qkv_bias,\n                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[\n                    cur + i],\n                norm_layer=norm_layer, qk_ratio=qk_ratio, sr_ratio=sr_ratios[1])\n            for i in range(depths[1])])\n        cur += depths[1]\n        self.blocks_c = nn.CellList([\n            Block(\n                dim=embed_dims[2], num_heads=num_heads[2], mlp_ratio=mlp_ratios[2], qkv_bias=qkv_bias,\n                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[\n                    cur + i],\n                norm_layer=norm_layer, qk_ratio=qk_ratio, sr_ratio=sr_ratios[2])\n            for i in range(depths[2])])\n        cur += depths[2]\n        self.blocks_d = nn.CellList([\n            Block(\n                dim=embed_dims[3], num_heads=num_heads[3], mlp_ratio=mlp_ratios[3], qkv_bias=qkv_bias,\n                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[\n                    cur + i],\n                norm_layer=norm_layer, qk_ratio=qk_ratio, sr_ratio=sr_ratios[3])\n            for i in range(depths[3])])\n\n        # Classifier head\n        self._fc = nn.Conv2d(\n            embed_dims[-1], fc_dim, kernel_size=1, has_bias=True)\n        self._bn = nn.BatchNorm2d(fc_dim)\n        self._drop = Dropout(p=drop_rate)\n        self.head = nn.Dense(\n            fc_dim, num_classes) if num_classes > 0 else ops.Identity()\n\n        self._initialize_weights()\n\n    def _initialize_weights(self):\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                cell.weight.set_data(\n                    init.initializer(init.HeNormal(mode='fan_out', nonlinearity='relu'),\n                                     cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(\n                        init.initializer('zeros', cell.bias.shape, cell.bias.dtype))\n\n            if isinstance(cell, nn.Dense):\n                cell.weight.set_data(init.initializer(init.TruncatedNormal(sigma=0.02), cell.weight.shape,\n                                                      cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(\n                        init.initializer('zeros', cell.bias.shape, cell.bias.dtype))\n\n            elif isinstance(cell, (nn.LayerNorm, nn.BatchNorm2d)):\n                cell.gamma.set_data(init.initializer('ones', cell.gamma.shape, cell.gamma.dtype))\n                cell.beta.set_data(init.initializer('zeros', cell.beta.shape, cell.beta.dtype))\n\n    def forward_features(self, x):\n        B = x.shape[0]\n        x = self.stem_conv1(x)\n        x = self.stem_relu1(x)\n        x = self.stem_norm1(x)\n\n        x = self.stem_conv2(x)\n        x = self.stem_relu2(x)\n        x = self.stem_norm2(x)\n\n        x = self.stem_conv3(x)\n        x = self.stem_relu3(x)\n        x = self.stem_norm3(x)\n\n        x, (H, W) = self.patch_embed_a(x)\n        for _, blk in enumerate(self.blocks_a):\n            x = blk(x, H, W, self.relative_pos_a)\n\n        x = ops.transpose(x.reshape(B, H, W, -1), (0, 3, 1, 2))\n        x, (H, W) = self.patch_embed_b(x)\n        for _, blk in enumerate(self.blocks_b):\n            x = blk(x, H, W, self.relative_pos_b)\n\n        x = ops.transpose(x.reshape(B, H, W, -1), (0, 3, 1, 2))\n        x, (H, W) = self.patch_embed_c(x)\n        for _, blk in enumerate(self.blocks_c):\n            x = blk(x, H, W, self.relative_pos_c)\n\n        x = ops.transpose(x.reshape(B, H, W, -1), (0, 3, 1, 2))\n        x, (H, W) = self.patch_embed_d(x)\n        for _, blk in enumerate(self.blocks_d):\n            x = blk(x, H, W, self.relative_pos_d)\n\n        B, _, C = x.shape\n\n        x = self._fc(ops.transpose(x, (0, 2, 1)).reshape(B, C, H, W))\n        x = self._bn(x)\n        x = swish(x)\n        x = GlobalAvgPooling()(x)\n        x = self._drop(x)\n        return x\n\n    def forward_head(self, x):\n        x = self.head(x)\n        return x\n\n    def construct(self, x):\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.cmt.cmt_base","title":"mindcv.models.cmt.cmt_base(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

CMT-Base

Source code in mindcv/models/cmt.py
@register_model\ndef cmt_base(pretrained=False, num_classes: int = 1000, in_channels: int = 3, **kwargs):\n\"\"\"\n    CMT-Base\n    \"\"\"\n    default_cfg = default_cfgs[\"cmt_base\"]\n\n    model = CMT(img_size=256, num_classes=num_classes, in_channels=in_channels, qkv_bias=True,\n                embed_dims=[76, 152, 304, 608], stem_channel=38, num_heads=[1, 2, 4, 8], depths=[4, 4, 20, 4],\n                mlp_ratios=[4, 4, 4, 4], qk_ratio=1, sr_ratios=[8, 4, 2, 1], **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.cmt.cmt_small","title":"mindcv.models.cmt.cmt_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

CMT-Small

Source code in mindcv/models/cmt.py
@register_model\ndef cmt_small(pretrained=False, num_classes: int = 1000, in_channels: int = 3, **kwargs):\n\"\"\"\n    CMT-Small\n    \"\"\"\n    default_cfg = default_cfgs[\"cmt_small\"]\n\n    model = CMT(img_size=224, num_classes=num_classes, in_channels=in_channels, qkv_bias=True,\n                embed_dims=[64, 128, 256, 512], stem_channel=32, num_heads=[1, 2, 4, 8], depths=[3, 3, 16, 3],\n                mlp_ratios=[4, 4, 4, 4], qk_ratio=1, sr_ratios=[8, 4, 2, 1], **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.cmt.cmt_tiny","title":"mindcv.models.cmt.cmt_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

CMT-tiny

Source code in mindcv/models/cmt.py
@register_model\ndef cmt_tiny(pretrained=False, num_classes: int = 1000, in_channels: int = 3, **kwargs):\n\"\"\"\n    CMT-tiny\n    \"\"\"\n    default_cfg = default_cfgs[\"cmt_tiny\"]\n\n    model = CMT(img_size=160, num_classes=num_classes, in_channels=in_channels, qkv_bias=True,\n                embed_dims=[46, 92, 184, 368], stem_channel=16, num_heads=[1, 2, 4, 8], depths=[2, 2, 10, 2],\n                mlp_ratios=[3.6, 3.6, 3.6, 3.6], qk_ratio=1, sr_ratios=[8, 4, 2, 1], **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.cmt.cmt_xsmall","title":"mindcv.models.cmt.cmt_xsmall(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

CMT-XSmall

Source code in mindcv/models/cmt.py
@register_model\ndef cmt_xsmall(pretrained=False, num_classes: int = 1000, in_channels: int = 3, **kwargs):\n\"\"\"\n    CMT-XSmall\n    \"\"\"\n    default_cfg = default_cfgs[\"cmt_xsmall\"]\n\n    model = CMT(img_size=192, num_classes=num_classes, in_channels=in_channels, qkv_bias=True,\n                embed_dims=[52, 104, 208, 416], stem_channel=16, num_heads=[1, 2, 4, 8], depths=[3, 3, 12, 3],\n                mlp_ratios=[3.8, 3.8, 3.8, 3.8], qk_ratio=1, sr_ratios=[8, 4, 2, 1], **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#coat","title":"coat","text":""},{"location":"reference/models/#mindcv.models.coat.coat_lite_medium","title":"mindcv.models.coat.coat_lite_medium(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/coat.py
@register_model\ndef coat_lite_medium(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs['coat_lite_medium']\n    model = CoaT(in_channels=in_channels, num_classes=num_classes,\n                 patch_size=4, embed_dims=[128, 256, 320, 512],\n                 serial_depths=[3, 6, 10, 8], parallel_depth=0,\n                 num_heads=8, mlp_ratios=[4, 4, 4, 4], **kwargs)\n    model.default_cfg = default_cfg\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.coat.coat_lite_mini","title":"mindcv.models.coat.coat_lite_mini(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/coat.py
@register_model\ndef coat_lite_mini(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs['coat_lite_mini']\n    model = CoaT(in_channels=in_channels, num_classes=num_classes,\n                 patch_size=4, embed_dims=[64, 128, 320, 512],\n                 serial_depths=[2, 2, 2, 2], parallel_depth=0,\n                 num_heads=8, mlp_ratios=[8, 8, 4, 4], **kwargs)\n    model.default_cfg = default_cfg\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.coat.coat_lite_small","title":"mindcv.models.coat.coat_lite_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/coat.py
@register_model\ndef coat_lite_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs['coat_lite_small']\n    model = CoaT(in_channels=in_channels, num_classes=num_classes,\n                 patch_size=4, embed_dims=[64, 128, 320, 512],\n                 serial_depths=[3, 4, 6, 3], parallel_depth=0,\n                 num_heads=8, mlp_ratios=[8, 8, 4, 4], **kwargs)\n    model.default_cfg = default_cfg\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.coat.coat_lite_tiny","title":"mindcv.models.coat.coat_lite_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/coat.py
@register_model\ndef coat_lite_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs['coat_lite_tiny']\n    model = CoaT(in_channels=in_channels, num_classes=num_classes,\n                 patch_size=4, embed_dims=[64, 128, 256, 320],\n                 serial_depths=[2, 2, 2, 2], parallel_depth=0,\n                 num_heads=8, mlp_ratios=[8, 8, 4, 4], **kwargs)\n    model.default_cfg = default_cfg\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.coat.coat_mini","title":"mindcv.models.coat.coat_mini(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/coat.py
@register_model\ndef coat_mini(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs['coat_mini']\n    model = CoaT(in_channels=in_channels, num_classes=num_classes,\n                 patch_size=4, embed_dims=[152, 216, 216, 216],\n                 serial_depths=[2, 2, 2, 2], parallel_depth=6,\n                 num_heads=8, mlp_ratios=[4, 4, 4, 4], **kwargs)\n    model.default_cfg = default_cfg\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.coat.coat_small","title":"mindcv.models.coat.coat_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/coat.py
@register_model\ndef coat_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs['coat_small']\n    model = CoaT(in_channels=in_channels, num_classes=num_classes,\n                 patch_size=4, embed_dims=[152, 320, 320, 320],\n                 serial_depths=[2, 2, 2, 2], parallel_depth=6,\n                 num_heads=8, mlp_ratios=[4, 4, 4, 4], **kwargs)\n    model.default_cfg = default_cfg\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.coat.coat_tiny","title":"mindcv.models.coat.coat_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/coat.py
@register_model\ndef coat_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs['coat_mini']\n    model = CoaT(in_channels=in_channels, num_classes=num_classes,\n                 patch_size=4, embed_dims=[152, 152, 152, 152],\n                 serial_depths=[2, 2, 2, 2], parallel_depth=6,\n                 num_heads=8, mlp_ratios=[4, 4, 4, 4], **kwargs)\n    model.default_cfg = default_cfg\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#convit","title":"convit","text":""},{"location":"reference/models/#mindcv.models.convit.ConViT","title":"mindcv.models.convit.ConViT","text":"

Bases: nn.Cell

ConViT model class, based on '\"Improving Vision Transformers with Soft Convolutional Inductive Biases\" https://arxiv.org/pdf/2103.10697.pdf'

PARAMETER DESCRIPTION in_channels

number the channels of the input. Default: 3.

TYPE: int DEFAULT: 3

num_classes

number of classification classes. Default: 1000.

TYPE: int) DEFAULT: 1000

image_size

images input size. Default: 224.

TYPE: int) DEFAULT: 224

patch_size

image patch size. Default: 16.

TYPE: int) DEFAULT: 16

embed_dim

embedding dimension in all head. Default: 48.

TYPE: int) DEFAULT: 48

num_heads

number of heads. Default: 12.

TYPE: int) DEFAULT: 12

drop_rate

dropout rate. Default: 0.

TYPE: float) DEFAULT: 0.0

drop_path_rate

drop path rate. Default: 0.1.

TYPE: float) DEFAULT: 0.1

depth

model block depth. Default: 12.

TYPE: int) DEFAULT: 12

mlp_ratio

ratio of hidden features in Mlp. Default: 4.

TYPE: float) DEFAULT: 4.0

qkv_bias

have bias in qkv layers or not. Default: False.

TYPE: bool) DEFAULT: False

attn_drop_rate

attention layers dropout rate. Default: 0.

TYPE: float) DEFAULT: 0.0

locality_strength

determines how focused each head is around its attention center. Default: 1.

TYPE: float) DEFAULT: 1.0

local_up_to_layer

number of GPSA layers. Default: 10.

TYPE: int) DEFAULT: 10

use_pos_embed

whether use the embeded position. Default: True.

TYPE: bool DEFAULT: True

locality_strength\uff08float\uff09

the strength of locality. Default: 1.

Source code in mindcv/models/convit.py
class ConViT(nn.Cell):\nr\"\"\"ConViT model class, based on\n    '\"Improving Vision Transformers with Soft Convolutional Inductive Biases\"\n    <https://arxiv.org/pdf/2103.10697.pdf>'\n\n    Args:\n        in_channels (int): number the channels of the input. Default: 3.\n        num_classes (int) : number of classification classes. Default: 1000.\n        image_size (int) : images input size. Default: 224.\n        patch_size (int) : image patch size. Default: 16.\n        embed_dim (int) : embedding dimension in all head. Default: 48.\n        num_heads (int) : number of heads. Default: 12.\n        drop_rate (float) : dropout rate. Default: 0.\n        drop_path_rate (float) : drop path rate. Default: 0.1.\n        depth (int) : model block depth. Default: 12.\n        mlp_ratio (float) : ratio of hidden features in Mlp. Default: 4.\n        qkv_bias (bool) : have bias in qkv layers or not. Default: False.\n        attn_drop_rate (float) : attention layers dropout rate. Default: 0.\n        locality_strength (float) : determines how focused each head is around its attention center. Default: 1.\n        local_up_to_layer (int) : number of GPSA layers. Default: 10.\n        use_pos_embed (bool): whether use the embeded position.  Default: True.\n        locality_strength\uff08float\uff09: the strength of locality. Default: 1.\n    \"\"\"\n\n    def __init__(\n        self,\n        in_channels: int = 3,\n        num_classes: int = 1000,\n        image_size: int = 224,\n        patch_size: int = 16,\n        embed_dim: int = 48,\n        num_heads: int = 12,\n        drop_rate: float = 0.0,\n        drop_path_rate: float = 0.1,\n        depth: int = 12,\n        mlp_ratio: float = 4.0,\n        qkv_bias: bool = False,\n        attn_drop_rate: float = 0.0,\n        local_up_to_layer: int = 10,\n        use_pos_embed: bool = True,\n        locality_strength: float = 1.0,\n    ) -> None:\n        super().__init__()\n\n        self.local_up_to_layer = local_up_to_layer\n        self.use_pos_embed = use_pos_embed\n        self.num_heads = num_heads\n        self.locality_strength = locality_strength\n        self.embed_dim = embed_dim\n\n        self.patch_embed = PatchEmbed(\n            image_size=image_size, patch_size=patch_size, in_chans=in_channels, embed_dim=embed_dim)\n        self.num_patches = self.patch_embed.num_patches\n\n        self.cls_token = Parameter(ops.Zeros()((1, 1, embed_dim), ms.float32))\n        self.pos_drop = Dropout(p=drop_rate)\n\n        if self.use_pos_embed:\n            self.pos_embed = Parameter(ops.Zeros()((1, self.num_patches, embed_dim), ms.float32))\n            self.pos_embed.set_data(init.initializer(init.TruncatedNormal(sigma=0.02), self.pos_embed.data.shape))\n\n        dpr = [x.item() for x in np.linspace(0, drop_path_rate, depth)]\n        self.blocks = nn.CellList([\n            Block(\n                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,\n                drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],\n                use_gpsa=True)\n            if i < local_up_to_layer else\n            Block(\n                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,\n                drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],\n                use_gpsa=False)\n            for i in range(depth)])\n        self.norm = nn.LayerNorm((embed_dim,))\n\n        self.classifier = nn.Dense(in_channels=embed_dim, out_channels=num_classes) if num_classes > 0 else Identity()\n        self.cls_token.set_data(init.initializer(init.TruncatedNormal(sigma=0.02), self.cls_token.data.shape))\n        self._initialize_weights()\n\n    def _initialize_weights(self) -> None:\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Dense):\n                cell.weight.set_data(init.initializer(init.TruncatedNormal(sigma=0.02), cell.weight.data.shape))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(init.Constant(0), cell.bias.shape))\n            elif isinstance(cell, nn.LayerNorm):\n                cell.gamma.set_data(init.initializer(init.Constant(1), cell.gamma.shape))\n                cell.beta.set_data(init.initializer(init.Constant(0), cell.beta.shape))\n        # local init\n        for i in range(self.local_up_to_layer):\n            self.blocks[i].attn.v.weight.set_data(ops.eye(self.embed_dim, self.embed_dim, ms.float32), slice_shape=True)\n            locality_distance = 1\n            kernel_size = int(self.num_heads**0.5)\n            center = (kernel_size - 1) / 2 if kernel_size % 2 == 0 else kernel_size // 2\n            pos_weight_data = self.blocks[i].attn.pos_proj.weight.data\n            for h1 in range(kernel_size):\n                for h2 in range(kernel_size):\n                    position = h1 + kernel_size * h2\n                    pos_weight_data[position, 2] = -1\n                    pos_weight_data[position, 1] = 2 * (h1 - center) * locality_distance\n                    pos_weight_data[position, 0] = 2 * (h2 - center) * locality_distance\n            pos_weight_data = pos_weight_data * self.locality_strength\n            self.blocks[i].attn.pos_proj.weight.set_data(pos_weight_data)\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.patch_embed(x)\n        if self.use_pos_embed:\n            x = x + self.pos_embed\n        x = self.pos_drop(x)\n        cls_tokens = ops.tile(self.cls_token, (x.shape[0], 1, 1))\n        for u, blk in enumerate(self.blocks):\n            if u == self.local_up_to_layer:\n                x = ops.Cast()(x, cls_tokens.dtype)\n                x = ops.concat((cls_tokens, x), 1)\n            x = blk(x)\n        x = self.norm(x)\n        return x[:, 0]\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.classifier(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.convit.convit_base","title":"mindcv.models.convit.convit_base(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ConViT base model Refer to the base class \"models.ConViT\" for more details.

Source code in mindcv/models/convit.py
@register_model\ndef convit_base(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:\n\"\"\"Get ConViT base model\n    Refer to the base class \"models.ConViT\" for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"convit_base\"]\n    model = ConViT(in_channels=in_channels, num_classes=num_classes,\n                   num_heads=16, embed_dim=768, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.convit.convit_base_plus","title":"mindcv.models.convit.convit_base_plus(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ConViT base+ model Refer to the base class \"models.ConViT\" for more details.

Source code in mindcv/models/convit.py
@register_model\ndef convit_base_plus(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:\n\"\"\"Get ConViT base+ model\n    Refer to the base class \"models.ConViT\" for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"convit_base_plus\"]\n    model = ConViT(in_channels=in_channels, num_classes=num_classes,\n                   num_heads=16, embed_dim=1024, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.convit.convit_small","title":"mindcv.models.convit.convit_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ConViT small model Refer to the base class \"models.ConViT\" for more details.

Source code in mindcv/models/convit.py
@register_model\ndef convit_small(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:\n\"\"\"Get ConViT small model\n    Refer to the base class \"models.ConViT\" for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"convit_small\"]\n    model = ConViT(in_channels=in_channels, num_classes=num_classes,\n                   num_heads=9, embed_dim=432, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.convit.convit_small_plus","title":"mindcv.models.convit.convit_small_plus(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ConViT small+ model Refer to the base class \"models.ConViT\" for more details.

Source code in mindcv/models/convit.py
@register_model\ndef convit_small_plus(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:\n\"\"\"Get ConViT small+ model\n    Refer to the base class \"models.ConViT\" for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"convit_small_plus\"]\n    model = ConViT(in_channels=in_channels, num_classes=num_classes,\n                   num_heads=9, embed_dim=576, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.convit.convit_tiny","title":"mindcv.models.convit.convit_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ConViT tiny model Refer to the base class \"models.ConViT\" for more details.

Source code in mindcv/models/convit.py
@register_model\ndef convit_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:\n\"\"\"Get ConViT tiny model\n    Refer to the base class \"models.ConViT\" for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"convit_tiny\"]\n    model = ConViT(in_channels=in_channels, num_classes=num_classes,\n                   num_heads=4, embed_dim=192, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.convit.convit_tiny_plus","title":"mindcv.models.convit.convit_tiny_plus(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ConViT tiny+ model Refer to the base class \"models.ConViT\" for more details.

Source code in mindcv/models/convit.py
@register_model\ndef convit_tiny_plus(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:\n\"\"\"Get ConViT tiny+ model\n    Refer to the base class \"models.ConViT\" for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"convit_tiny_plus\"]\n    model = ConViT(in_channels=in_channels, num_classes=num_classes,\n                   num_heads=4, embed_dim=256, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#convnext","title":"convnext","text":""},{"location":"reference/models/#mindcv.models.convnext.ConvNeXt","title":"mindcv.models.convnext.ConvNeXt","text":"

Bases: nn.Cell

ConvNeXt and ConvNeXt V2 model class, based on \"A ConvNet for the 2020s\" <https://arxiv.org/abs/2201.03545>_ and \"ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders\" <https://arxiv.org/abs/2301.00808>_

PARAMETER DESCRIPTION in_channels

dim of the input channel.

TYPE: int

num_classes

dim of the classes predicted.

TYPE: int

depths

the depths of each layer.

TYPE: List[int]

dims

the middle dim of each layer.

TYPE: List[int]

drop_path_rate

the rate of droppath. Default: 0.0.

TYPE: float DEFAULT: 0.0

layer_scale_init_value

the parameter of init for the classifier. Default: 1e-6.

TYPE: float DEFAULT: 1e-06

head_init_scale

the parameter of init for the head. Default: 1.0.

TYPE: float DEFAULT: 1.0

use_grn

If True, use Global Response Normalization in each block. Default: False.

TYPE: bool DEFAULT: False

Source code in mindcv/models/convnext.py
class ConvNeXt(nn.Cell):\nr\"\"\"ConvNeXt and ConvNeXt V2 model class, based on\n    `\"A ConvNet for the 2020s\" <https://arxiv.org/abs/2201.03545>`_ and\n    `\"ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders\" <https://arxiv.org/abs/2301.00808>`_\n\n    Args:\n        in_channels: dim of the input channel.\n        num_classes: dim of the classes predicted.\n        depths: the depths of each layer.\n        dims: the middle dim of each layer.\n        drop_path_rate: the rate of droppath. Default: 0.0.\n        layer_scale_init_value: the parameter of init for the classifier. Default: 1e-6.\n        head_init_scale: the parameter of init for the head. Default: 1.0.\n        use_grn: If True, use Global Response Normalization in each block. Default: False.\n    \"\"\"\n\n    def __init__(\n        self,\n        in_channels: int,\n        num_classes: int,\n        depths: List[int],\n        dims: List[int],\n        drop_path_rate: float = 0.0,\n        layer_scale_init_value: float = 1e-6,\n        head_init_scale: float = 1.0,\n        use_grn: bool = False,\n    ):\n        super().__init__()\n\n        downsample_layers = []  # stem and 3 intermediate down_sampling conv layers\n        stem = nn.SequentialCell(\n            nn.Conv2d(in_channels, dims[0], kernel_size=4, stride=4, has_bias=True),\n            ConvNextLayerNorm((dims[0],), epsilon=1e-6, norm_axis=1),\n        )\n        downsample_layers.append(stem)\n        for i in range(3):\n            downsample_layer = nn.SequentialCell(\n                ConvNextLayerNorm((dims[i],), epsilon=1e-6, norm_axis=1),\n                nn.Conv2d(dims[i], dims[i + 1], kernel_size=2, stride=2, has_bias=True),\n            )\n            downsample_layers.append(downsample_layer)\n\n        total_reduction = 4\n        self.feature_info = []\n        self.flatten_sequential = True\n\n        stages = []  # 4 feature resolution stages, each consisting of multiple residual blocks\n        dp_rates = list(np.linspace(0, drop_path_rate, sum(depths)))\n        cur = 0\n        for i in range(4):\n            blocks = []\n            for j in range(depths[i]):\n                blocks.append(Block(dim=dims[i], drop_path=dp_rates[cur + j],\n                                    layer_scale_init_value=layer_scale_init_value, use_grn=use_grn))\n            stage = nn.SequentialCell(blocks)\n            stages.append(stage)\n            cur += depths[i]\n\n            if i > 0:\n                total_reduction *= 2\n            self.feature_info.append(dict(chs=dims[i], reduction=total_reduction, name=f'feature.{i * 2 + 1}'))\n\n        self.feature = nn.SequentialCell([\n            downsample_layers[0],\n            stages[0],\n            downsample_layers[1],\n            stages[1],\n            downsample_layers[2],\n            stages[2],\n            downsample_layers[3],\n            stages[3]\n        ])\n        self.norm = ConvNextLayerNorm((dims[-1],), epsilon=1e-6)  # final norm layer\n        self.classifier = nn.Dense(dims[-1], num_classes)  # classifier\n        self.head_init_scale = head_init_scale\n        self._initialize_weights()\n\n    def _initialize_weights(self) -> None:\n\"\"\"Initialize weights for cells.\"\"\"\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, (nn.Dense, nn.Conv2d)):\n                cell.weight.set_data(\n                    init.initializer(init.TruncatedNormal(sigma=0.02), cell.weight.shape, cell.weight.dtype)\n                )\n                if isinstance(cell, nn.Dense) and cell.bias is not None:\n                    cell.bias.set_data(init.initializer(init.Zero(), cell.bias.shape, cell.bias.dtype))\n        self.classifier.weight.set_data(self.classifier.weight * self.head_init_scale)\n        self.classifier.bias.set_data(self.classifier.bias * self.head_init_scale)\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.classifier(x)\n        return x\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.feature(x)\n        return self.norm(x.mean([-2, -1]))  # global average pooling, (N, C, H, W) -> (N, C)\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.convnext.convnext_base","title":"mindcv.models.convnext.convnext_base(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ConvNeXt base model. Refer to the base class 'models.ConvNeXt' for more details.

Source code in mindcv/models/convnext.py
@register_model\ndef convnext_base(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:\n\"\"\"Get ConvNeXt base model.\n    Refer to the base class 'models.ConvNeXt' for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"convnext_base\"]\n    model_args = dict(\n        in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], **kwargs\n    )\n    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.convnext.convnext_large","title":"mindcv.models.convnext.convnext_large(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ConvNeXt large model. Refer to the base class 'models.ConvNeXt' for more details.

Source code in mindcv/models/convnext.py
@register_model\ndef convnext_large(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:\n\"\"\"Get ConvNeXt large model.\n    Refer to the base class 'models.ConvNeXt' for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"convnext_large\"]\n    model_args = dict(\n        in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], **kwargs\n    )\n    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.convnext.convnext_small","title":"mindcv.models.convnext.convnext_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ConvNeXt small model. Refer to the base class 'models.ConvNeXt' for more details.

Source code in mindcv/models/convnext.py
@register_model\ndef convnext_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:\n\"\"\"Get ConvNeXt small model.\n    Refer to the base class 'models.ConvNeXt' for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"convnext_small\"]\n    model_args = dict(\n        in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3], dims=[96, 192, 384, 768], **kwargs\n    )\n    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.convnext.convnext_tiny","title":"mindcv.models.convnext.convnext_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ConvNeXt tiny model. Refer to the base class 'models.ConvNeXt' for more details.

Source code in mindcv/models/convnext.py
@register_model\ndef convnext_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:\n\"\"\"Get ConvNeXt tiny model.\n    Refer to the base class 'models.ConvNeXt' for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"convnext_tiny\"]\n    model_args = dict(\n        in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], **kwargs\n    )\n    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.convnext.convnext_xlarge","title":"mindcv.models.convnext.convnext_xlarge(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ConvNeXt xlarge model. Refer to the base class 'models.ConvNeXt' for more details.

Source code in mindcv/models/convnext.py
@register_model\ndef convnext_xlarge(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:\n\"\"\"Get ConvNeXt xlarge model.\n    Refer to the base class 'models.ConvNeXt' for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"convnext_xlarge\"]\n    model_args = dict(\n        in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3], dims=[256, 512, 1024, 2048], **kwargs\n    )\n    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.convnext.convnextv2_atto","title":"mindcv.models.convnext.convnextv2_atto(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ConvNeXt_v2 atto model. Refer to the base class 'models.ConvNeXt' for more details.

Source code in mindcv/models/convnext.py
@register_model\ndef convnextv2_atto(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:\n\"\"\"Get ConvNeXt_v2 atto model.\n    Refer to the base class 'models.ConvNeXt' for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"convnextv2_atto\"]\n    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[2, 2, 6, 2],\n                      dims=[40, 80, 160, 320], use_grn=True, layer_scale_init_value=0.0, **kwargs)\n    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.convnext.convnextv2_base","title":"mindcv.models.convnext.convnextv2_base(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ConvNeXt_v2 base model. Refer to the base class 'models.ConvNeXt' for more details.

Source code in mindcv/models/convnext.py
@register_model\ndef convnextv2_base(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:\n\"\"\"Get ConvNeXt_v2 base model.\n    Refer to the base class 'models.ConvNeXt' for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"convnextv2_base\"]\n    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3],\n                      dims=[128, 256, 512, 1024], use_grn=True, layer_scale_init_value=0.0, **kwargs)\n    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.convnext.convnextv2_femto","title":"mindcv.models.convnext.convnextv2_femto(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ConvNeXt_v2 femto model. Refer to the base class 'models.ConvNeXt' for more details.

Source code in mindcv/models/convnext.py
@register_model\ndef convnextv2_femto(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:\n\"\"\"Get ConvNeXt_v2 femto model.\n    Refer to the base class 'models.ConvNeXt' for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"convnextv2_femto\"]\n    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[2, 2, 6, 2],\n                      dims=[48, 96, 192, 384], use_grn=True, layer_scale_init_value=0.0, **kwargs)\n    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.convnext.convnextv2_huge","title":"mindcv.models.convnext.convnextv2_huge(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ConvNeXt_v2 huge model. Refer to the base class 'models.ConvNeXt' for more details.

Source code in mindcv/models/convnext.py
@register_model\ndef convnextv2_huge(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:\n\"\"\"Get ConvNeXt_v2 huge model.\n    Refer to the base class 'models.ConvNeXt' for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"convnextv2_huge\"]\n    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3],\n                      dims=[352, 704, 1408, 2816], use_grn=True, layer_scale_init_value=0.0, **kwargs)\n    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.convnext.convnextv2_large","title":"mindcv.models.convnext.convnextv2_large(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ConvNeXt_v2 large model. Refer to the base class 'models.ConvNeXt' for more details.

Source code in mindcv/models/convnext.py
@register_model\ndef convnextv2_large(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:\n\"\"\"Get ConvNeXt_v2 large model.\n    Refer to the base class 'models.ConvNeXt' for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"convnextv2_large\"]\n    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3],\n                      dims=[192, 384, 768, 1536], use_grn=True, layer_scale_init_value=0.0, **kwargs)\n    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.convnext.convnextv2_nano","title":"mindcv.models.convnext.convnextv2_nano(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ConvNeXt_v2 nano model. Refer to the base class 'models.ConvNeXt' for more details.

Source code in mindcv/models/convnext.py
@register_model\ndef convnextv2_nano(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:\n\"\"\"Get ConvNeXt_v2 nano model.\n    Refer to the base class 'models.ConvNeXt' for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"convnextv2_nano\"]\n    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[2, 2, 8, 2],\n                      dims=[80, 160, 320, 640], use_grn=True, layer_scale_init_value=0.0, **kwargs)\n    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.convnext.convnextv2_pico","title":"mindcv.models.convnext.convnextv2_pico(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ConvNeXt_v2 pico model. Refer to the base class 'models.ConvNeXt' for more details.

Source code in mindcv/models/convnext.py
@register_model\ndef convnextv2_pico(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:\n\"\"\"Get ConvNeXt_v2 pico model.\n    Refer to the base class 'models.ConvNeXt' for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"convnextv2_pico\"]\n    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[2, 2, 6, 2],\n                      dims=[64, 128, 256, 512], use_grn=True, layer_scale_init_value=0.0, **kwargs)\n    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.convnext.convnextv2_tiny","title":"mindcv.models.convnext.convnextv2_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ConvNeXt_v2 tiny model. Refer to the base class 'models.ConvNeXt' for more details.

Source code in mindcv/models/convnext.py
@register_model\ndef convnextv2_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:\n\"\"\"Get ConvNeXt_v2 tiny model.\n    Refer to the base class 'models.ConvNeXt' for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"convnextv2_tiny\"]\n    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 9, 3],\n                      dims=[96, 192, 384, 768], use_grn=True, layer_scale_init_value=0.0, **kwargs)\n    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#crossvit","title":"crossvit","text":""},{"location":"reference/models/#mindcv.models.crossvit.crossvit_15","title":"mindcv.models.crossvit.crossvit_15(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/crossvit.py
@register_model\ndef crossvit_15(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VisionTransformer:\n    model = VisionTransformer(img_size=[240, 224],\n                              patch_size=[12, 16], embed_dim=[192, 384], depth=[[1, 5, 0], [1, 5, 0], [1, 5, 0]],\n                              num_heads=[6, 6], mlp_ratio=[3, 3, 1], qkv_bias=True,\n                              norm_layer=nn.LayerNorm, in_channels=in_channels, num_classes=num_classes, **kwargs)\n    default_cfg = default_cfgs[\"crossvit_15\"]\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.crossvit.crossvit_18","title":"mindcv.models.crossvit.crossvit_18(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/crossvit.py
@register_model\ndef crossvit_18(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VisionTransformer:\n    model = VisionTransformer(img_size=[240, 224],\n                              patch_size=[12, 16], embed_dim=[224, 448], depth=[[1, 6, 0], [1, 6, 0], [1, 6, 0]],\n                              num_heads=[7, 7], mlp_ratio=[3, 3, 1], qkv_bias=True,\n                              norm_layer=nn.LayerNorm, in_channels=in_channels, num_classes=num_classes, **kwargs)\n    default_cfg = default_cfgs[\"crossvit_18\"]\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.crossvit.crossvit_9","title":"mindcv.models.crossvit.crossvit_9(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/crossvit.py
@register_model\ndef crossvit_9(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    model = VisionTransformer(img_size=[240, 224],\n                              patch_size=[12, 16], embed_dim=[128, 256], depth=[[1, 3, 0], [1, 3, 0], [1, 3, 0]],\n                              num_heads=[4, 4], mlp_ratio=[3, 3, 1], qkv_bias=True,\n                              norm_layer=nn.LayerNorm, in_channels=in_channels, num_classes=num_classes, **kwargs)\n    default_cfg = default_cfgs[\"crossvit_9\"]\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#densenet","title":"densenet","text":""},{"location":"reference/models/#mindcv.models.densenet.DenseNet","title":"mindcv.models.densenet.DenseNet","text":"

Bases: nn.Cell

Densenet-BC model class, based on \"Densely Connected Convolutional Networks\" <https://arxiv.org/pdf/1608.06993.pdf>_

PARAMETER DESCRIPTION growth_rate

how many filters to add each layer (k in paper). Default: 32.

TYPE: int DEFAULT: 32

block_config

how many layers in each pooling block. Default: (6, 12, 24, 16).

TYPE: Tuple[int, int, int, int] DEFAULT: (6, 12, 24, 16)

num_init_features

number of filters in the first Conv2d. Default: 64.

TYPE: int DEFAULT: 64

bn_size

multiplicative factor for number of bottleneck layers (i.e. bn_size * k features in the bottleneck layer). Default: 4.

TYPE: int DEFAULT: 4

drop_rate

dropout rate after each dense layer. Default: 0.

TYPE: float DEFAULT: 0.0

in_channels

number of input channels. Default: 3.

TYPE: int DEFAULT: 3

num_classes

number of classification classes. Default: 1000.

TYPE: int DEFAULT: 1000

Source code in mindcv/models/densenet.py
class DenseNet(nn.Cell):\nr\"\"\"Densenet-BC model class, based on\n    `\"Densely Connected Convolutional Networks\" <https://arxiv.org/pdf/1608.06993.pdf>`_\n\n    Args:\n        growth_rate: how many filters to add each layer (`k` in paper). Default: 32.\n        block_config: how many layers in each pooling block. Default: (6, 12, 24, 16).\n        num_init_features: number of filters in the first Conv2d. Default: 64.\n        bn_size (int): multiplicative factor for number of bottleneck layers\n          (i.e. bn_size * k features in the bottleneck layer). Default: 4.\n        drop_rate: dropout rate after each dense layer. Default: 0.\n        in_channels: number of input channels. Default: 3.\n        num_classes: number of classification classes. Default: 1000.\n    \"\"\"\n\n    def __init__(\n        self,\n        growth_rate: int = 32,\n        block_config: Tuple[int, int, int, int] = (6, 12, 24, 16),\n        num_init_features: int = 64,\n        bn_size: int = 4,\n        drop_rate: float = 0.0,\n        in_channels: int = 3,\n        num_classes: int = 1000,\n    ) -> None:\n        super().__init__()\n        layers = OrderedDict()\n        # first Conv2d\n        num_features = num_init_features\n        layers[\"conv0\"] = nn.Conv2d(in_channels, num_features, kernel_size=7, stride=2, pad_mode=\"pad\", padding=3)\n        layers[\"norm0\"] = nn.BatchNorm2d(num_features)\n        layers[\"relu0\"] = nn.ReLU()\n        layers[\"pool0\"] = nn.SequentialCell([\n            nn.Pad(paddings=((0, 0), (0, 0), (1, 1), (1, 1)), mode=\"CONSTANT\"),\n            nn.MaxPool2d(kernel_size=3, stride=2),\n        ])\n\n        # DenseBlock\n        for i, num_layers in enumerate(block_config):\n            block = _DenseBlock(\n                num_layers=num_layers,\n                num_input_features=num_features,\n                bn_size=bn_size,\n                growth_rate=growth_rate,\n                drop_rate=drop_rate,\n            )\n            layers[f\"denseblock{i + 1}\"] = block\n            num_features += num_layers * growth_rate\n            if i != len(block_config) - 1:\n                transition = _Transition(num_features, num_features // 2)\n                layers[f\"transition{i + 1}\"] = transition\n                num_features = num_features // 2\n\n        # final bn+ReLU\n        layers[\"norm5\"] = nn.BatchNorm2d(num_features)\n        layers[\"relu5\"] = nn.ReLU()\n\n        self.num_features = num_features\n        self.features = nn.SequentialCell(layers)\n        self.pool = GlobalAvgPooling()\n        self.classifier = nn.Dense(self.num_features, num_classes)\n        self._initialize_weights()\n\n    def _initialize_weights(self) -> None:\n\"\"\"Initialize weights for cells.\"\"\"\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                cell.weight.set_data(\n                    init.initializer(init.HeNormal(math.sqrt(5), mode=\"fan_out\", nonlinearity=\"relu\"),\n                                     cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(\n                        init.initializer(init.HeUniform(math.sqrt(5), mode=\"fan_in\", nonlinearity=\"leaky_relu\"),\n                                         cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, nn.BatchNorm2d):\n                cell.gamma.set_data(init.initializer(\"ones\", cell.gamma.shape, cell.gamma.dtype))\n                cell.beta.set_data(init.initializer(\"zeros\", cell.beta.shape, cell.beta.dtype))\n            elif isinstance(cell, nn.Dense):\n                cell.weight.set_data(\n                    init.initializer(init.HeUniform(math.sqrt(5), mode=\"fan_in\", nonlinearity=\"leaky_relu\"),\n                                     cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.features(x)\n        return x\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.pool(x)\n        x = self.classifier(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.densenet.densenet121","title":"mindcv.models.densenet.densenet121(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 121 layers DenseNet model. Refer to the base class models.DenseNet for more details.

Source code in mindcv/models/densenet.py
@register_model\ndef densenet121(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DenseNet:\n\"\"\"Get 121 layers DenseNet model.\n     Refer to the base class `models.DenseNet` for more details.\"\"\"\n    default_cfg = default_cfgs[\"densenet121\"]\n    model = DenseNet(growth_rate=32, block_config=(6, 12, 24, 16), num_init_features=64, in_channels=in_channels,\n                     num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.densenet.densenet161","title":"mindcv.models.densenet.densenet161(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 161 layers DenseNet model. Refer to the base class models.DenseNet for more details.

Source code in mindcv/models/densenet.py
@register_model\ndef densenet161(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DenseNet:\n\"\"\"Get 161 layers DenseNet model.\n     Refer to the base class `models.DenseNet` for more details.\"\"\"\n    default_cfg = default_cfgs[\"densenet161\"]\n    model = DenseNet(growth_rate=48, block_config=(6, 12, 36, 24), num_init_features=96, in_channels=in_channels,\n                     num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.densenet.densenet169","title":"mindcv.models.densenet.densenet169(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 169 layers DenseNet model. Refer to the base class models.DenseNet for more details.

Source code in mindcv/models/densenet.py
@register_model\ndef densenet169(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DenseNet:\n\"\"\"Get 169 layers DenseNet model.\n     Refer to the base class `models.DenseNet` for more details.\"\"\"\n    default_cfg = default_cfgs[\"densenet169\"]\n    model = DenseNet(growth_rate=32, block_config=(6, 12, 32, 32), num_init_features=64, in_channels=in_channels,\n                     num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.densenet.densenet201","title":"mindcv.models.densenet.densenet201(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 201 layers DenseNet model. Refer to the base class models.DenseNet for more details.

Source code in mindcv/models/densenet.py
@register_model\ndef densenet201(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DenseNet:\n\"\"\"Get 201 layers DenseNet model.\n     Refer to the base class `models.DenseNet` for more details.\"\"\"\n    default_cfg = default_cfgs[\"densenet201\"]\n    model = DenseNet(growth_rate=32, block_config=(6, 12, 48, 32), num_init_features=64, in_channels=in_channels,\n                     num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#dpn","title":"dpn","text":""},{"location":"reference/models/#mindcv.models.dpn.DPN","title":"mindcv.models.dpn.DPN","text":"

Bases: nn.Cell

DPN model class, based on \"Dual Path Networks\" <https://arxiv.org/pdf/1707.01629.pdf>_

PARAMETER DESCRIPTION num_init_channel

int type, the output channel of first blocks. Default: 64.

TYPE: int DEFAULT: 64

k_r

int type, the first channel of each stage. Default: 96.

TYPE: int DEFAULT: 96

g

int type,number of group in the conv2d. Default: 32.

TYPE: int DEFAULT: 32

k_sec

multiplicative factor for number of bottleneck layers. Default: 4.

TYPE: Tuple[int] DEFAULT: (3, 4, 20, 3)

inc_sec

the first output channel in each stage. Default: (16, 32, 24, 128).

TYPE: Tuple[int] DEFAULT: (16, 32, 24, 128)

in_channels

int type, number of input channels. Default: 3.

TYPE: int DEFAULT: 3

num_classes

int type, number of classification classes. Default: 1000.

TYPE: int DEFAULT: 1000

Source code in mindcv/models/dpn.py
class DPN(nn.Cell):\nr\"\"\"DPN model class, based on\n    `\"Dual Path Networks\" <https://arxiv.org/pdf/1707.01629.pdf>`_\n\n    Args:\n        num_init_channel: int type, the output channel of first blocks. Default: 64.\n        k_r: int type, the first channel of each stage. Default: 96.\n        g: int type,number of group in the conv2d. Default: 32.\n        k_sec Tuple[int]: multiplicative factor for number of bottleneck layers. Default: 4.\n        inc_sec Tuple[int]: the first output channel in each stage. Default: (16, 32, 24, 128).\n        in_channels: int type, number of input channels. Default: 3.\n        num_classes: int type, number of classification classes. Default: 1000.\n    \"\"\"\n\n    def __init__(\n        self,\n        num_init_channel: int = 64,\n        k_r: int = 96,\n        g: int = 32,\n        k_sec: Tuple[int, int, int, int] = (3, 4, 20, 3),\n        inc_sec: Tuple[int, int, int, int] = (16, 32, 24, 128),\n        in_channels: int = 3,\n        num_classes: int = 1000,\n    ):\n        super().__init__()\n        blocks = OrderedDict()\n\n        # conv1\n        blocks[\"conv1\"] = nn.SequentialCell(OrderedDict([\n            (\"conv\", nn.Conv2d(in_channels, num_init_channel, kernel_size=7, stride=2, pad_mode=\"pad\", padding=3)),\n            (\"norm\", nn.BatchNorm2d(num_init_channel, eps=1e-3, momentum=0.9)),\n            (\"relu\", nn.ReLU()),\n            (\"maxpool\", nn.MaxPool2d(kernel_size=3, stride=2, pad_mode=\"same\")),\n        ]))\n\n        # conv2\n        bw = 256\n        inc = inc_sec[0]\n        r = int((k_r * bw) / 256)\n        blocks[\"conv2_1\"] = DualPathBlock(num_init_channel, r, r, bw, inc, g, \"proj\", False)\n        in_channel = bw + 3 * inc\n        for i in range(2, k_sec[0] + 1):\n            blocks[f\"conv2_{i}\"] = DualPathBlock(in_channel, r, r, bw, inc, g, \"normal\")\n            in_channel += inc\n\n        # conv3\n        bw = 512\n        inc = inc_sec[1]\n        r = int((k_r * bw) / 256)\n        blocks[\"conv3_1\"] = DualPathBlock(in_channel, r, r, bw, inc, g, \"down\")\n        in_channel = bw + 3 * inc\n        for i in range(2, k_sec[1] + 1):\n            blocks[f\"conv3_{i}\"] = DualPathBlock(in_channel, r, r, bw, inc, g, \"normal\")\n            in_channel += inc\n\n        # conv4\n        bw = 1024\n        inc = inc_sec[2]\n        r = int((k_r * bw) / 256)\n        blocks[\"conv4_1\"] = DualPathBlock(in_channel, r, r, bw, inc, g, \"down\")\n        in_channel = bw + 3 * inc\n        for i in range(2, k_sec[2] + 1):\n            blocks[f\"conv4_{i}\"] = DualPathBlock(in_channel, r, r, bw, inc, g, \"normal\")\n            in_channel += inc\n\n        # conv5\n        bw = 2048\n        inc = inc_sec[3]\n        r = int((k_r * bw) / 256)\n        blocks[\"conv5_1\"] = DualPathBlock(in_channel, r, r, bw, inc, g, \"down\")\n        in_channel = bw + 3 * inc\n        for i in range(2, k_sec[3] + 1):\n            blocks[f\"conv5_{i}\"] = DualPathBlock(in_channel, r, r, bw, inc, g, \"normal\")\n            in_channel += inc\n\n        self.features = nn.SequentialCell(blocks)\n        self.conv5_x = nn.SequentialCell(OrderedDict([\n            (\"norm\", nn.BatchNorm2d(in_channel, eps=1e-3, momentum=0.9)),\n            (\"relu\", nn.ReLU()),\n        ]))\n        self.avgpool = GlobalAvgPooling()\n        self.classifier = nn.Dense(in_channel, num_classes)\n        self._initialize_weights()\n\n    def _initialize_weights(self) -> None:\n\"\"\"Initialize weights for cells.\"\"\"\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                cell.weight.set_data(\n                    init.initializer(init.HeNormal(math.sqrt(5), mode=\"fan_out\", nonlinearity=\"relu\"),\n                                     cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(\n                        init.initializer(init.HeUniform(math.sqrt(5), mode=\"fan_in\", nonlinearity=\"leaky_relu\"),\n                                         cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, nn.BatchNorm2d):\n                cell.gamma.set_data(init.initializer(\"ones\", cell.gamma.shape, cell.gamma.dtype))\n                cell.beta.set_data(init.initializer(\"zeros\", cell.beta.shape, cell.beta.dtype))\n            elif isinstance(cell, nn.Dense):\n                cell.weight.set_data(\n                    init.initializer(init.HeUniform(math.sqrt(5), mode=\"fan_in\", nonlinearity=\"leaky_relu\"),\n                                     cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n\n    def forward_feature(self, x: Tensor) -> Tensor:\n        x = self.features(x)\n        x = ops.concat(x, axis=1)\n        x = self.conv5_x(x)\n        return x\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.avgpool(x)\n        x = self.classifier(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_feature(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.dpn.dpn107","title":"mindcv.models.dpn.dpn107(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 107 layers DPN model. Refer to the base class models.DPN for more details.

Source code in mindcv/models/dpn.py
@register_model\ndef dpn107(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DPN:\n\"\"\"Get 107 layers DPN model.\n     Refer to the base class `models.DPN` for more details.\"\"\"\n    default_cfg = default_cfgs[\"dpn107\"]\n    model = DPN(num_init_channel=128, k_r=200, g=50, k_sec=(4, 8, 20, 3), inc_sec=(20, 64, 64, 128),\n                num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.dpn.dpn131","title":"mindcv.models.dpn.dpn131(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 131 layers DPN model. Refer to the base class models.DPN for more details.

Source code in mindcv/models/dpn.py
@register_model\ndef dpn131(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DPN:\n\"\"\"Get 131 layers DPN model.\n     Refer to the base class `models.DPN` for more details.\"\"\"\n    default_cfg = default_cfgs[\"dpn131\"]\n    model = DPN(num_init_channel=128, k_r=160, g=40, k_sec=(4, 8, 28, 3), inc_sec=(16, 32, 32, 128),\n                num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.dpn.dpn92","title":"mindcv.models.dpn.dpn92(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 92 layers DPN model. Refer to the base class models.DPN for more details.

Source code in mindcv/models/dpn.py
@register_model\ndef dpn92(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DPN:\n\"\"\"Get 92 layers DPN model.\n     Refer to the base class `models.DPN` for more details.\"\"\"\n    default_cfg = default_cfgs[\"dpn92\"]\n    model = DPN(num_init_channel=64, k_r=96, g=32, k_sec=(3, 4, 20, 3), inc_sec=(16, 32, 24, 128),\n                num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.dpn.dpn98","title":"mindcv.models.dpn.dpn98(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 98 layers DPN model. Refer to the base class models.DPN for more details.

Source code in mindcv/models/dpn.py
@register_model\ndef dpn98(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DPN:\n\"\"\"Get 98 layers DPN model.\n     Refer to the base class `models.DPN` for more details.\"\"\"\n    default_cfg = default_cfgs[\"dpn98\"]\n    model = DPN(num_init_channel=96, k_r=160, g=40, k_sec=(3, 6, 20, 3), inc_sec=(16, 32, 32, 128),\n                num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#edgenext","title":"edgenext","text":""},{"location":"reference/models/#mindcv.models.edgenext.EdgeNeXt","title":"mindcv.models.edgenext.EdgeNeXt","text":"

Bases: nn.Cell

EdgeNeXt model class, based on \"Efficiently Amalgamated CNN-Transformer Architecture for Mobile Vision\" <https://arxiv.org/abs/2206.10589>_

PARAMETER DESCRIPTION in_channels

number of input channels. Default: 3

num_classes

number of classification classes. Default: 1000

DEFAULT: 1000

depths

the depths of each layer. Default: [0, 0, 0, 3]

DEFAULT: [3, 3, 9, 3]

dims

the middle dim of each layer. Default: [24, 48, 88, 168]

DEFAULT: [24, 48, 88, 168]

global_block

number of global block. Default: [0, 0, 0, 3]

DEFAULT: [0, 0, 0, 3]

global_block_type

type of global block. Default: ['None', 'None', 'None', 'SDTA']

DEFAULT: ['None', 'None', 'None', 'SDTA']

drop_path_rate

Stochastic Depth. Default: 0.

DEFAULT: 0.0

layer_scale_init_value

value of layer scale initialization. Default: 1e-6

DEFAULT: 1e-06

head_init_scale

scale of head initialization. Default: 1.

DEFAULT: 1.0

expan_ratio

ratio of expansion. Default: 4

DEFAULT: 4

kernel_sizes

kernel sizes of different stages. Default: [7, 7, 7, 7]

DEFAULT: [7, 7, 7, 7]

heads

number of attention heads. Default: [8, 8, 8, 8]

DEFAULT: [8, 8, 8, 8]

use_pos_embd_xca

use position embedding in xca or not. Default: [False, False, False, False]

DEFAULT: [False, False, False, False]

use_pos_embd_global

use position embedding globally or not. Default: False

DEFAULT: False

d2_scales

scales of splitting channels

DEFAULT: [2, 3, 4, 5]

Source code in mindcv/models/edgenext.py
class EdgeNeXt(nn.Cell):\nr\"\"\"EdgeNeXt model class, based on\n    `\"Efficiently Amalgamated CNN-Transformer Architecture for Mobile Vision\" <https://arxiv.org/abs/2206.10589>`_\n\n    Args:\n        in_channels: number of input channels. Default: 3\n        num_classes: number of classification classes. Default: 1000\n        depths: the depths of each layer. Default: [0, 0, 0, 3]\n        dims: the middle dim of each layer. Default: [24, 48, 88, 168]\n        global_block: number of global block. Default: [0, 0, 0, 3]\n        global_block_type: type of global block. Default: ['None', 'None', 'None', 'SDTA']\n        drop_path_rate: Stochastic Depth. Default: 0.\n        layer_scale_init_value: value of layer scale initialization. Default: 1e-6\n        head_init_scale: scale of head initialization. Default: 1.\n        expan_ratio: ratio of expansion. Default: 4\n        kernel_sizes: kernel sizes of different stages. Default: [7, 7, 7, 7]\n        heads: number of attention heads. Default: [8, 8, 8, 8]\n        use_pos_embd_xca: use position embedding in xca or not. Default: [False, False, False, False]\n        use_pos_embd_global: use position embedding globally or not. Default: False\n        d2_scales: scales of splitting channels\n    \"\"\"\n    def __init__(self, in_chans=3, num_classes=1000,\n                 depths=[3, 3, 9, 3], dims=[24, 48, 88, 168],\n                 global_block=[0, 0, 0, 3], global_block_type=[\"None\", \"None\", \"None\", \"SDTA\"],\n                 drop_path_rate=0., layer_scale_init_value=1e-6, head_init_scale=1., expan_ratio=4,\n                 kernel_sizes=[7, 7, 7, 7], heads=[8, 8, 8, 8], use_pos_embd_xca=[False, False, False, False],\n                 use_pos_embd_global=False, d2_scales=[2, 3, 4, 5], **kwargs):\n        super().__init__()\n        for g in global_block_type:\n            assert g in [\"None\", \"SDTA\"]\n        if use_pos_embd_global:\n            self.pos_embd = PositionalEncodingFourier(dim=dims[0])\n        else:\n            self.pos_embd = None\n        self.downsample_layers = nn.CellList()  # stem and 3 intermediate downsampling conv layers\n        stem = nn.SequentialCell(\n            nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4, has_bias=True),\n            LayerNorm((dims[0],), epsilon=1e-6, norm_axis=1),\n        )\n        self.downsample_layers.append(stem)\n        for i in range(3):\n            downsample_layer = nn.SequentialCell(\n                LayerNorm((dims[i],), epsilon=1e-6, norm_axis=1),\n                nn.Conv2d(dims[i], dims[i + 1], kernel_size=2, stride=2, has_bias=True),\n            )\n            self.downsample_layers.append(downsample_layer)\n\n        self.stages = nn.CellList()  # 4 feature resolution stages, each consisting of multiple residual blocks\n        dp_rates = list(np.linspace(0, drop_path_rate, sum(depths)))\n        cur = 0\n        for i in range(4):\n            stage_blocks = []\n            for j in range(depths[i]):\n                if j > depths[i] - global_block[i] - 1:\n                    if global_block_type[i] == \"SDTA\":\n                        stage_blocks.append(SDTAEncoder(dim=dims[i], drop_path=dp_rates[cur + j],\n                                                        expan_ratio=expan_ratio, scales=d2_scales[i],\n                                                        use_pos_emb=use_pos_embd_xca[i], num_heads=heads[i]))\n                    else:\n                        raise NotImplementedError\n                else:\n                    stage_blocks.append(ConvEncoder(dim=dims[i], drop_path=dp_rates[cur + j],\n                                                    layer_scale_init_value=layer_scale_init_value,\n                                                    expan_ratio=expan_ratio, kernel_size=kernel_sizes[i]))\n\n            self.stages.append(nn.SequentialCell(*stage_blocks))\n            cur += depths[i]\n        self.norm = nn.LayerNorm((dims[-1],), epsilon=1e-6)  # Final norm layer\n        self.head = nn.Dense(dims[-1], num_classes)\n\n        # self.head_dropout = Dropout(kwargs[\"classifier_dropout\"])\n        self.head_dropout = Dropout(p=0.0)\n        self.head_init_scale = head_init_scale\n        self._initialize_weights()\n\n    def _initialize_weights(self) -> None:\n\"\"\"Initialize weights for cells.\"\"\"\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, (nn.Dense, nn.Conv2d)):\n                cell.weight.set_data(\n                    init.initializer(init.TruncatedNormal(sigma=0.02), cell.weight.shape, cell.weight.dtype)\n                )\n                if isinstance(cell, nn.Dense) and cell.bias is not None:\n                    cell.bias.set_data(init.initializer(init.Zero(), cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, (nn.LayerNorm)):\n                cell.gamma.set_data(init.initializer(init.One(), cell.gamma.shape, cell.gamma.dtype))\n                cell.beta.set_data(init.initializer(init.Zero(), cell.beta.shape, cell.beta.dtype))\n        self.head.weight.set_data(self.head.weight * self.head_init_scale)\n        self.head.bias.set_data(self.head.bias * self.head_init_scale)\n\n    def forward_features(self, x):\n        x = self.downsample_layers[0](x)\n        x = self.stages[0](x)\n        if self.pos_embd is not None:\n            B, C, H, W = x.shape\n            x = x + self.pos_embd(B, H, W)\n        for i in range(1, 4):\n            x = self.downsample_layers[i](x)\n            x = self.stages[i](x)\n        return self.norm(x.mean([-2, -1]))  # Global average pooling, (N, C, H, W) -> (N, C)\n\n    def construct(self, x):\n        x = self.forward_features(x)\n        x = self.head(self.head_dropout(x))\n        return x\n
"},{"location":"reference/models/#mindcv.models.edgenext.edgenext_base","title":"mindcv.models.edgenext.edgenext_base(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get edgenext_base model. Refer to the base class models.EdgeNeXt for more details.

Source code in mindcv/models/edgenext.py
@register_model\ndef edgenext_base(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> EdgeNeXt:\n\"\"\"Get edgenext_base model.\n    Refer to the base class `models.EdgeNeXt` for more details.\"\"\"\n    default_cfg = default_cfgs[\"edgenext_base\"]\n    model = EdgeNeXt(\n        depths=[3, 3, 9, 3],\n        dims=[80, 160, 288, 584],\n        expan_ratio=4,\n        num_classes=num_classes,\n        global_block=[0, 1, 1, 1],\n        global_block_type=[\"None\", \"SDTA\", \"SDTA\", \"SDTA\"],\n        use_pos_embd_xca=[False, True, False, False],\n        kernel_sizes=[3, 5, 7, 9],\n        d2_scales=[2, 2, 3, 4],\n        **kwargs\n    )\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.edgenext.edgenext_small","title":"mindcv.models.edgenext.edgenext_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get edgenext_small model. Refer to the base class models.EdgeNeXt for more details.

Source code in mindcv/models/edgenext.py
@register_model\ndef edgenext_small(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> EdgeNeXt:\n\"\"\"Get edgenext_small model.\n    Refer to the base class `models.EdgeNeXt` for more details.\"\"\"\n    default_cfg = default_cfgs[\"edgenext_small\"]\n    model = EdgeNeXt(\n        depths=[3, 3, 9, 3],\n        dims=[48, 96, 160, 304],\n        expan_ratio=4,\n        num_classes=num_classes,\n        global_block=[0, 1, 1, 1],\n        global_block_type=[\"None\", \"SDTA\", \"SDTA\", \"SDTA\"],\n        use_pos_embd_xca=[False, True, False, False],\n        kernel_sizes=[3, 5, 7, 9],\n        d2_scales=[2, 2, 3, 4],\n        **kwargs\n    )\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.edgenext.edgenext_x_small","title":"mindcv.models.edgenext.edgenext_x_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get edgenext_x_small model. Refer to the base class models.EdgeNeXt for more details.

Source code in mindcv/models/edgenext.py
@register_model\ndef edgenext_x_small(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> EdgeNeXt:\n\"\"\"Get edgenext_x_small model.\n    Refer to the base class `models.EdgeNeXt` for more details.\"\"\"\n    default_cfg = default_cfgs[\"edgenext_x_small\"]\n    model = EdgeNeXt(\n        depths=[3, 3, 9, 3],\n        dims=[32, 64, 100, 192],\n        expan_ratio=4,\n        num_classes=num_classes,\n        global_block=[0, 1, 1, 1],\n        global_block_type=[\"None\", \"SDTA\", \"SDTA\", \"SDTA\"],\n        use_pos_embd_xca=[False, True, False, False],\n        kernel_sizes=[3, 5, 7, 9],\n        heads=[4, 4, 4, 4],\n        d2_scales=[2, 2, 3, 4],\n        **kwargs\n    )\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.edgenext.edgenext_xx_small","title":"mindcv.models.edgenext.edgenext_xx_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get edgenext_xx_small model. Refer to the base class models.EdgeNeXt for more details.

Source code in mindcv/models/edgenext.py
@register_model\ndef edgenext_xx_small(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> EdgeNeXt:\n\"\"\"Get edgenext_xx_small model.\n        Refer to the base class `models.EdgeNeXt` for more details.\"\"\"\n    default_cfg = default_cfgs[\"edgenext_xx_small\"]\n    model = EdgeNeXt(\n        depths=[2, 2, 6, 2],\n        dims=[24, 48, 88, 168],\n        expan_ratio=4,\n        num_classes=num_classes,\n        global_block=[0, 1, 1, 1],\n        global_block_type=['None', 'SDTA', 'SDTA', 'SDTA'],\n        use_pos_embd_xca=[False, True, False, False],\n        kernel_sizes=[3, 5, 7, 9],\n        heads=[4, 4, 4, 4],\n        d2_scales=[2, 2, 3, 4],\n        **kwargs\n    )\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#efficientnet","title":"efficientnet","text":""},{"location":"reference/models/#mindcv.models.efficientnet.EfficientNet","title":"mindcv.models.efficientnet.EfficientNet","text":"

Bases: nn.Cell

EfficientNet architecture. EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

PARAMETER DESCRIPTION arch

The name of the model.

TYPE: str

dropout_rate

The dropout rate of efficientnet.

TYPE: float

width_mult

The ratio of the channel. Default: 1.0.

TYPE: float DEFAULT: 1.0

depth_mult

The ratio of num_layers. Default: 1.0.

TYPE: float DEFAULT: 1.0

in_channels

The input channels. Default: 3.

TYPE: int DEFAULT: 3

num_classes

The number of class. Default: 1000.

TYPE: int DEFAULT: 1000

inverted_residual_setting

The settings of block. Default: None.

TYPE: Sequence[Union[MBConvConfig, FusedMBConvConfig]] DEFAULT: None

drop_path_prob

The drop path rate of MBConv. Default: 0.2.

TYPE: float DEFAULT: 0.2

norm_layer

The normalization layer. Default: None.

TYPE: nn.Cell DEFAULT: None

Inputs
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
Outputs

Tensor of shape :math:(N, 1000).

Source code in mindcv/models/efficientnet.py
class EfficientNet(nn.Cell):\n\"\"\"\n    EfficientNet architecture.\n    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.\n\n    Args:\n        arch (str): The name of the model.\n        dropout_rate (float): The dropout rate of efficientnet.\n        width_mult (float): The ratio of the channel. Default: 1.0.\n        depth_mult (float): The ratio of num_layers. Default: 1.0.\n        in_channels (int): The input channels. Default: 3.\n        num_classes (int): The number of class. Default: 1000.\n        inverted_residual_setting (Sequence[Union[MBConvConfig, FusedMBConvConfig]], optional): The settings of block.\n            Default: None.\n        drop_path_prob (float): The drop path rate of MBConv. Default: 0.2.\n        norm_layer (nn.Cell, optional): The normalization layer. Default: None.\n\n    Inputs:\n        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.\n\n    Outputs:\n        Tensor of shape :math:`(N, 1000)`.\n    \"\"\"\n\n    def __init__(\n        self,\n        arch: str,\n        dropout_rate: float,\n        width_mult: float = 1.0,\n        depth_mult: float = 1.0,\n        in_channels: int = 3,\n        num_classes: int = 1000,\n        inverted_residual_setting: Optional[Sequence[Union[MBConvConfig, FusedMBConvConfig]]] = None,\n        drop_path_prob: float = 0.2,\n        norm_layer: Optional[nn.Cell] = None,\n    ) -> None:\n        super().__init__()\n        self.last_channel = None\n\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n            if width_mult >= 1.6:\n                norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.99)\n\n        layers: List[nn.Cell] = []\n\n        if not inverted_residual_setting:\n            if arch.startswith(\"efficientnet_b\"):\n                bneck_conf = partial(MBConvConfig, width_cnf=width_mult, depth_cnf=depth_mult)\n                inverted_residual_setting = [\n                    bneck_conf(1, 3, 1, 32, 16, 1),\n                    bneck_conf(6, 3, 2, 16, 24, 2),\n                    bneck_conf(6, 5, 2, 24, 40, 2),\n                    bneck_conf(6, 3, 2, 40, 80, 3),\n                    bneck_conf(6, 5, 1, 80, 112, 3),\n                    bneck_conf(6, 5, 2, 112, 192, 4),\n                    bneck_conf(6, 3, 1, 192, 320, 1),\n                ]\n            elif arch.startswith(\"efficientnet_v2_s\"):\n                inverted_residual_setting = [\n                    FusedMBConvConfig(1, 3, 1, 24, 24, 2),\n                    FusedMBConvConfig(4, 3, 2, 24, 48, 4),\n                    FusedMBConvConfig(4, 3, 2, 48, 64, 4),\n                    MBConvConfig(4, 3, 2, 64, 128, 6),\n                    MBConvConfig(6, 3, 1, 128, 160, 9),\n                    MBConvConfig(6, 3, 2, 160, 256, 15),\n                ]\n                self.last_channel = 1280\n            elif arch.startswith(\"efficientnet_v2_m\"):\n                inverted_residual_setting = [\n                    FusedMBConvConfig(1, 3, 1, 24, 24, 3),\n                    FusedMBConvConfig(4, 3, 2, 24, 48, 5),\n                    FusedMBConvConfig(4, 3, 2, 48, 80, 5),\n                    MBConvConfig(4, 3, 2, 80, 160, 7),\n                    MBConvConfig(6, 3, 1, 160, 176, 14),\n                    MBConvConfig(6, 3, 2, 176, 304, 18),\n                    MBConvConfig(6, 3, 1, 304, 512, 5),\n                ]\n                self.last_channel = 1280\n            elif arch.startswith(\"efficientnet_v2_l\"):\n                inverted_residual_setting = [\n                    FusedMBConvConfig(1, 3, 1, 32, 32, 4),\n                    FusedMBConvConfig(4, 3, 2, 32, 64, 7),\n                    FusedMBConvConfig(4, 3, 2, 64, 96, 7),\n                    MBConvConfig(4, 3, 2, 96, 192, 10),\n                    MBConvConfig(6, 3, 1, 192, 224, 19),\n                    MBConvConfig(6, 3, 2, 224, 384, 25),\n                    MBConvConfig(6, 3, 1, 384, 640, 7),\n                ]\n                self.last_channel = 1280\n            elif arch.startswith(\"efficientnet_v2_xl\"):\n                inverted_residual_setting = [\n                    FusedMBConvConfig(1, 3, 1, 32, 32, 4),\n                    FusedMBConvConfig(4, 3, 2, 32, 64, 8),\n                    FusedMBConvConfig(4, 3, 2, 64, 96, 8),\n                    MBConvConfig(4, 3, 2, 96, 192, 16),\n                    MBConvConfig(6, 3, 1, 192, 256, 24),\n                    MBConvConfig(6, 3, 2, 256, 512, 32),\n                    MBConvConfig(6, 3, 1, 512, 640, 8),\n                ]\n                self.last_channel = 1280\n\n        # building first layer\n        firstconv_output_channels = inverted_residual_setting[0].input_channels\n        layers.extend([\n            nn.Conv2d(in_channels, firstconv_output_channels, kernel_size=3, stride=2),\n            norm_layer(firstconv_output_channels),\n            Swish(),\n        ])\n\n        total_reduction = 2\n        self.feature_info = [dict(chs=firstconv_output_channels, reduction=total_reduction,\n                                  name=f'features.{len(layers) - 1}')]\n\n        # building MBConv blocks\n        total_stage_blocks = sum(cnf.num_layers for cnf in inverted_residual_setting)\n        stage_block_id = 0\n\n        # cnf is the settings of block\n        for cnf in inverted_residual_setting:\n            stage: List[nn.Cell] = []\n\n            # cnf.num_layers is the num of the same block\n            for _ in range(cnf.num_layers):\n                # copy to avoid modifications. shallow copy is enough\n                block_cnf = copy.copy(cnf)\n\n                block = MBConv\n\n                if \"FusedMBConvConfig\" in str(type(block_cnf)):\n                    block = FusedMBConv\n\n                # overwrite info if not the first conv in the stage\n                if stage:\n                    block_cnf.input_channels = block_cnf.out_channels\n                    block_cnf.stride = 1\n\n                # adjust dropout rate of blocks based on the depth of the stage block\n                sd_prob = drop_path_prob * float(stage_block_id) / total_stage_blocks\n\n                total_reduction *= block_cnf.stride\n\n                stage.append(block(block_cnf, sd_prob, norm_layer))\n                stage_block_id += 1\n\n            layers.append(nn.SequentialCell(stage))\n\n            self.feature_info.append(dict(chs=cnf.out_channels, reduction=total_reduction,\n                                          name=f'features.{len(layers) - 1}'))\n\n        # building last several layers\n        lastconv_input_channels = inverted_residual_setting[-1].out_channels\n        lastconv_output_channels = self.last_channel if self.last_channel is not None else 4 * lastconv_input_channels\n        layers.extend([\n            nn.Conv2d(lastconv_input_channels, lastconv_output_channels, kernel_size=1),\n            norm_layer(lastconv_output_channels),\n            Swish(),\n        ])\n\n        self.feature_info.append(dict(chs=lastconv_output_channels, reduction=total_reduction,\n                                      name=f'features.{len(layers) - 1}'))\n        self.flatten_sequential = True\n\n        self.features = nn.SequentialCell(layers)\n        self.avgpool = GlobalAvgPooling()\n        self.dropout = Dropout(p=dropout_rate)\n        self.mlp_head = nn.Dense(lastconv_output_channels, num_classes)\n        self._initialize_weights()\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.features(x)\n\n        x = self.avgpool(x)\n\n        if self.training:\n            x = self.dropout(x)\n        return x\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        return self.mlp_head(x)\n\n    def construct(self, x: Tensor) -> Tensor:\n\"\"\"construct\"\"\"\n        x = self.forward_features(x)\n        return self.forward_head(x)\n\n    def _initialize_weights(self) -> None:\n\"\"\"Initialize weights for cells.\"\"\"\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Dense):\n                init_range = 1.0 / np.sqrt(cell.weight.shape[0])\n                cell.weight.set_data(weight_init.initializer(Uniform(init_range), cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))\n            if isinstance(cell, nn.Conv2d):\n                out_channel, _, kernel_size_h, kernel_size_w = cell.weight.shape\n                stddev = np.sqrt(2 / int(out_channel * kernel_size_h * kernel_size_w))\n                cell.weight.set_data(\n                    weight_init.initializer(Normal(sigma=stddev), cell.weight.shape, cell.weight.dtype)\n                )\n                if cell.bias is not None:\n                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))\n
"},{"location":"reference/models/#mindcv.models.efficientnet.EfficientNet.construct","title":"mindcv.models.efficientnet.EfficientNet.construct(x)","text":"

construct

Source code in mindcv/models/efficientnet.py
def construct(self, x: Tensor) -> Tensor:\n\"\"\"construct\"\"\"\n    x = self.forward_features(x)\n    return self.forward_head(x)\n
"},{"location":"reference/models/#mindcv.models.efficientnet.efficientnet_b0","title":"mindcv.models.efficientnet.efficientnet_b0(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Constructs a EfficientNet B0 architecture from EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

PARAMETER DESCRIPTION pretrained

If True, returns a model pretrained on IMAGENET. Default: False.

TYPE: bool DEFAULT: False

num_classes

The numbers of classes. Default: 1000.

TYPE: int DEFAULT: 1000

in_channels

The input channels. Default: 1000.

TYPE: int DEFAULT: 3

Inputs
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
Outputs

Tensor of shape :math:(N, CLASSES_{out}).

Source code in mindcv/models/efficientnet.py
@register_model\ndef efficientnet_b0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:\n\"\"\"\n    Constructs a EfficientNet B0 architecture from\n    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.\n\n    Args:\n        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.\n        num_classes (int): The numbers of classes. Default: 1000.\n        in_channels (int): The input channels. Default: 1000.\n\n    Inputs:\n        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.\n\n    Outputs:\n        Tensor of shape :math:`(N, CLASSES_{out})`.\n    \"\"\"\n    return _efficientnet(\"efficientnet_b0\", 1.0, 1.0, 0.2, in_channels, num_classes, pretrained, **kwargs)\n
"},{"location":"reference/models/#mindcv.models.efficientnet.efficientnet_b1","title":"mindcv.models.efficientnet.efficientnet_b1(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Constructs a EfficientNet B1 architecture from EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

PARAMETER DESCRIPTION pretrained

If True, returns a model pretrained on IMAGENET. Default: False.

TYPE: bool DEFAULT: False

num_classes

The numbers of classes. Default: 1000.

TYPE: int DEFAULT: 1000

in_channels

The input channels. Default: 1000.

TYPE: int DEFAULT: 3

Inputs
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
Outputs

Tensor of shape :math:(N, CLASSES_{out}).

Source code in mindcv/models/efficientnet.py
@register_model\ndef efficientnet_b1(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:\n\"\"\"\n    Constructs a EfficientNet B1 architecture from\n    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.\n\n    Args:\n        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.\n        num_classes (int): The numbers of classes. Default: 1000.\n        in_channels (int): The input channels. Default: 1000.\n\n    Inputs:\n        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.\n\n    Outputs:\n        Tensor of shape :math:`(N, CLASSES_{out})`.\n    \"\"\"\n    return _efficientnet(\"efficientnet_b1\", 1.0, 1.1, 0.2, in_channels, num_classes, pretrained, **kwargs)\n
"},{"location":"reference/models/#mindcv.models.efficientnet.efficientnet_b2","title":"mindcv.models.efficientnet.efficientnet_b2(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Constructs a EfficientNet B2 architecture from EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

PARAMETER DESCRIPTION pretrained

If True, returns a model pretrained on IMAGENET. Default: False.

TYPE: bool DEFAULT: False

num_classes

The numbers of classes. Default: 1000.

TYPE: int DEFAULT: 1000

in_channels

The input channels. Default: 1000.

TYPE: int DEFAULT: 3

Inputs
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
Outputs

Tensor of shape :math:(N, CLASSES_{out}).

Source code in mindcv/models/efficientnet.py
@register_model\ndef efficientnet_b2(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:\n\"\"\"\n    Constructs a EfficientNet B2 architecture from\n    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.\n\n    Args:\n        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.\n        num_classes (int): The numbers of classes. Default: 1000.\n        in_channels (int): The input channels. Default: 1000.\n\n    Inputs:\n        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.\n\n    Outputs:\n        Tensor of shape :math:`(N, CLASSES_{out})`.\n    \"\"\"\n    return _efficientnet(\"efficientnet_b2\", 1.1, 1.2, 0.3, in_channels, num_classes, pretrained, **kwargs)\n
"},{"location":"reference/models/#mindcv.models.efficientnet.efficientnet_b3","title":"mindcv.models.efficientnet.efficientnet_b3(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Constructs a EfficientNet B3 architecture from EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

PARAMETER DESCRIPTION pretrained

If True, returns a model pretrained on IMAGENET. Default: False.

TYPE: bool DEFAULT: False

num_classes

The numbers of classes. Default: 1000.

TYPE: int DEFAULT: 1000

in_channels

The input channels. Default: 1000.

TYPE: int DEFAULT: 3

Inputs
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
Outputs

Tensor of shape :math:(N, CLASSES_{out}).

Source code in mindcv/models/efficientnet.py
@register_model\ndef efficientnet_b3(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:\n\"\"\"\n    Constructs a EfficientNet B3 architecture from\n    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.\n\n    Args:\n        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.\n        num_classes (int): The numbers of classes. Default: 1000.\n        in_channels (int): The input channels. Default: 1000.\n\n    Inputs:\n        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.\n\n    Outputs:\n        Tensor of shape :math:`(N, CLASSES_{out})`.\n    \"\"\"\n    return _efficientnet(\"efficientnet_b3\", 1.2, 1.4, 0.3, in_channels, num_classes, pretrained, **kwargs)\n
"},{"location":"reference/models/#mindcv.models.efficientnet.efficientnet_b4","title":"mindcv.models.efficientnet.efficientnet_b4(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Constructs a EfficientNet B4 architecture from EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

PARAMETER DESCRIPTION pretrained

If True, returns a model pretrained on IMAGENET. Default: False.

TYPE: bool DEFAULT: False

num_classes

The numbers of classes. Default: 1000.

TYPE: int DEFAULT: 1000

in_channels

The input channels. Default: 1000.

TYPE: int DEFAULT: 3

Inputs
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
Outputs

Tensor of shape :math:(N, CLASSES_{out}).

Source code in mindcv/models/efficientnet.py
@register_model\ndef efficientnet_b4(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:\n\"\"\"\n    Constructs a EfficientNet B4 architecture from\n    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.\n\n    Args:\n        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.\n        num_classes (int): The numbers of classes. Default: 1000.\n        in_channels (int): The input channels. Default: 1000.\n\n    Inputs:\n        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.\n\n    Outputs:\n        Tensor of shape :math:`(N, CLASSES_{out})`.\n    \"\"\"\n    return _efficientnet(\"efficientnet_b4\", 1.4, 1.8, 0.4, in_channels, num_classes, pretrained, **kwargs)\n
"},{"location":"reference/models/#mindcv.models.efficientnet.efficientnet_b5","title":"mindcv.models.efficientnet.efficientnet_b5(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Constructs a EfficientNet B5 architecture from EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

PARAMETER DESCRIPTION pretrained

If True, returns a model pretrained on IMAGENET. Default: False.

TYPE: bool DEFAULT: False

num_classes

The numbers of classes. Default: 1000.

TYPE: int DEFAULT: 1000

in_channels

The input channels. Default: 1000.

TYPE: int DEFAULT: 3

Inputs
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
Outputs

Tensor of shape :math:(N, CLASSES_{out}).

Source code in mindcv/models/efficientnet.py
@register_model\ndef efficientnet_b5(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:\n\"\"\"\n    Constructs a EfficientNet B5 architecture from\n    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.\n\n    Args:\n        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.\n        num_classes (int): The numbers of classes. Default: 1000.\n        in_channels (int): The input channels. Default: 1000.\n\n    Inputs:\n        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.\n\n    Outputs:\n        Tensor of shape :math:`(N, CLASSES_{out})`.\n    \"\"\"\n    return _efficientnet(\"efficientnet_b5\", 1.6, 2.2, 0.4, in_channels, num_classes, pretrained, **kwargs)\n
"},{"location":"reference/models/#mindcv.models.efficientnet.efficientnet_b6","title":"mindcv.models.efficientnet.efficientnet_b6(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Constructs a EfficientNet B6 architecture from EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

PARAMETER DESCRIPTION pretrained

If True, returns a model pretrained on IMAGENET. Default: False.

TYPE: bool DEFAULT: False

num_classes

The numbers of classes. Default: 1000.

TYPE: int DEFAULT: 1000

in_channels

The input channels. Default: 1000.

TYPE: int DEFAULT: 3

Inputs
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
Outputs

Tensor of shape :math:(N, CLASSES_{out}).

Source code in mindcv/models/efficientnet.py
@register_model\ndef efficientnet_b6(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:\n\"\"\"\n    Constructs a EfficientNet B6 architecture from\n    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.\n\n    Args:\n        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.\n        num_classes (int): The numbers of classes. Default: 1000.\n        in_channels (int): The input channels. Default: 1000.\n\n    Inputs:\n        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.\n\n    Outputs:\n        Tensor of shape :math:`(N, CLASSES_{out})`.\n    \"\"\"\n    return _efficientnet(\"efficientnet_b6\", 1.8, 2.6, 0.5, in_channels, num_classes, pretrained, **kwargs)\n
"},{"location":"reference/models/#mindcv.models.efficientnet.efficientnet_b7","title":"mindcv.models.efficientnet.efficientnet_b7(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Constructs a EfficientNet B7 architecture from EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

PARAMETER DESCRIPTION pretrained

If True, returns a model pretrained on IMAGENET. Default: False.

TYPE: bool DEFAULT: False

num_classes

The numbers of classes. Default: 1000.

TYPE: int DEFAULT: 1000

in_channels

The input channels. Default: 1000.

TYPE: int DEFAULT: 3

Inputs
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
Outputs

Tensor of shape :math:(N, CLASSES_{out}).

Source code in mindcv/models/efficientnet.py
@register_model\ndef efficientnet_b7(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:\n\"\"\"\n    Constructs a EfficientNet B7 architecture from\n    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.\n\n    Args:\n        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.\n        num_classes (int): The numbers of classes. Default: 1000.\n        in_channels (int): The input channels. Default: 1000.\n\n    Inputs:\n        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.\n\n    Outputs:\n        Tensor of shape :math:`(N, CLASSES_{out})`.\n    \"\"\"\n    return _efficientnet(\"efficientnet_b7\", 2.0, 3.1, 0.5, in_channels, num_classes, pretrained, **kwargs)\n
"},{"location":"reference/models/#mindcv.models.efficientnet.efficientnet_v2_l","title":"mindcv.models.efficientnet.efficientnet_v2_l(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Constructs a EfficientNet B4 architecture from EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

PARAMETER DESCRIPTION pretrained

If True, returns a model pretrained on IMAGENET. Default: False.

TYPE: bool DEFAULT: False

num_classes

The numbers of classes. Default: 1000.

TYPE: int DEFAULT: 1000

in_channels

The input channels. Default: 1000.

TYPE: int DEFAULT: 3

Inputs
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
Outputs

Tensor of shape :math:(N, CLASSES_{out}).

Source code in mindcv/models/efficientnet.py
@register_model\ndef efficientnet_v2_l(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:\n\"\"\"\n    Constructs a EfficientNet B4 architecture from\n    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.\n\n    Args:\n        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.\n        num_classes (int): The numbers of classes. Default: 1000.\n        in_channels (int): The input channels. Default: 1000.\n\n    Inputs:\n        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.\n\n    Outputs:\n        Tensor of shape :math:`(N, CLASSES_{out})`.\n    \"\"\"\n    return _efficientnet(\"efficientnet_v2_l\", 1.0, 1.0, 0.2, in_channels, num_classes, pretrained, **kwargs)\n
"},{"location":"reference/models/#mindcv.models.efficientnet.efficientnet_v2_m","title":"mindcv.models.efficientnet.efficientnet_v2_m(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Constructs a EfficientNet B4 architecture from EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

PARAMETER DESCRIPTION pretrained

If True, returns a model pretrained on IMAGENET. Default: False.

TYPE: bool DEFAULT: False

num_classes

The numbers of classes. Default: 1000.

TYPE: int DEFAULT: 1000

in_channels

The input channels. Default: 1000.

TYPE: int DEFAULT: 3

Inputs
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
Outputs

Tensor of shape :math:(N, CLASSES_{out}).

Source code in mindcv/models/efficientnet.py
@register_model\ndef efficientnet_v2_m(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:\n\"\"\"\n    Constructs a EfficientNet B4 architecture from\n    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.\n\n    Args:\n        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.\n        num_classes (int): The numbers of classes. Default: 1000.\n        in_channels (int): The input channels. Default: 1000.\n\n    Inputs:\n        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.\n\n    Outputs:\n        Tensor of shape :math:`(N, CLASSES_{out})`.\n    \"\"\"\n    return _efficientnet(\"efficientnet_v2_m\", 1.0, 1.0, 0.2, in_channels, num_classes, pretrained, **kwargs)\n
"},{"location":"reference/models/#mindcv.models.efficientnet.efficientnet_v2_s","title":"mindcv.models.efficientnet.efficientnet_v2_s(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Constructs a EfficientNet B4 architecture from EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

PARAMETER DESCRIPTION pretrained

If True, returns a model pretrained on IMAGENET. Default: False.

TYPE: bool DEFAULT: False

num_classes

The numbers of classes. Default: 1000.

TYPE: int DEFAULT: 1000

in_channels

The input channels. Default: 1000.

TYPE: int DEFAULT: 3

Inputs
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
Outputs

Tensor of shape :math:(N, CLASSES_{out}).

Source code in mindcv/models/efficientnet.py
@register_model\ndef efficientnet_v2_s(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:\n\"\"\"\n    Constructs a EfficientNet B4 architecture from\n    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.\n\n    Args:\n        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.\n        num_classes (int): The numbers of classes. Default: 1000.\n        in_channels (int): The input channels. Default: 1000.\n\n    Inputs:\n        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.\n\n    Outputs:\n        Tensor of shape :math:`(N, CLASSES_{out})`.\n    \"\"\"\n    return _efficientnet(\"efficientnet_v2_s\", 1.0, 1.0, 0.2, in_channels, num_classes, pretrained, **kwargs)\n
"},{"location":"reference/models/#mindcv.models.efficientnet.efficientnet_v2_xl","title":"mindcv.models.efficientnet.efficientnet_v2_xl(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Constructs a EfficientNet B4 architecture from EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

PARAMETER DESCRIPTION pretrained

If True, returns a model pretrained on IMAGENET. Default: False.

TYPE: bool DEFAULT: False

num_classes

The numbers of classes. Default: 1000.

TYPE: int DEFAULT: 1000

in_channels

The input channels. Default: 1000.

TYPE: int DEFAULT: 3

Inputs
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
Outputs

Tensor of shape :math:(N, CLASSES_{out}).

Source code in mindcv/models/efficientnet.py
@register_model\ndef efficientnet_v2_xl(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:\n\"\"\"\n    Constructs a EfficientNet B4 architecture from\n    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.\n\n    Args:\n        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.\n        num_classes (int): The numbers of classes. Default: 1000.\n        in_channels (int): The input channels. Default: 1000.\n\n    Inputs:\n        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.\n\n    Outputs:\n        Tensor of shape :math:`(N, CLASSES_{out})`.\n    \"\"\"\n    return _efficientnet(\"efficientnet_v2_xl\", 1.0, 1.0, 0.2, in_channels, num_classes, pretrained, **kwargs)\n
"},{"location":"reference/models/#features","title":"features","text":""},{"location":"reference/models/#ghostnet","title":"ghostnet","text":""},{"location":"reference/models/#mindcv.models.ghostnet.GhostNet","title":"mindcv.models.ghostnet.GhostNet","text":"

Bases: nn.Cell

GhostNet model class, based on \"GhostNet: More Features from Cheap Operations \" <https://arxiv.org/abs/1911.11907>_.

PARAMETER DESCRIPTION num_classes

number of classification classes. Default: 1000.

TYPE: int DEFAULT: 1000

width

base width of hidden channel in blocks. Default: 1.0.

TYPE: float DEFAULT: 1.0

in_channels

number of input channels. Default: 3.

TYPE: int DEFAULT: 3

drop_rate

the probability of the features before classification. Default: 0.2.

TYPE: float DEFAULT: 0.2

Source code in mindcv/models/ghostnet.py
class GhostNet(nn.Cell):\nr\"\"\"GhostNet model class, based on\n    `\"GhostNet: More Features from Cheap Operations \" <https://arxiv.org/abs/1911.11907>`_.\n    Args:\n        num_classes: number of classification classes. Default: 1000.\n        width: base width of hidden channel in blocks. Default: 1.0.\n        in_channels: number of input channels. Default: 3.\n        drop_rate: the probability of the features before classification. Default: 0.2.\n    \"\"\"\n\n    def __init__(\n        self,\n        num_classes: int = 1000,\n        width: float = 1.0,\n        in_channels: int = 3,\n        drop_rate: float = 0.2,\n    ) -> None:\n        super().__init__()\n        # setting of inverted residual blocks\n        self.num_classes = num_classes\n        self.drop_rate = drop_rate\n        self.cfgs = [\n            # k, t, c, SE, s\n            # stage1\n            [[3, 16, 16, 0, 1]],\n            # stage2\n            [[3, 48, 24, 0, 2]],\n            [[3, 72, 24, 0, 1]],\n            # stage3\n            [[5, 72, 40, 0.25, 2]],\n            [[5, 120, 40, 0.25, 1]],\n            # stage4\n            [[3, 240, 80, 0, 2]],\n            [[3, 200, 80, 0, 1],\n             [3, 184, 80, 0, 1],\n             [3, 184, 80, 0, 1],\n             [3, 480, 112, 0.25, 1],\n             [3, 672, 112, 0.25, 1]\n             ],\n            # stage5\n            [[5, 672, 160, 0.25, 2]],\n            [[5, 960, 160, 0, 1],\n             [5, 960, 160, 0.25, 1],\n             [5, 960, 160, 0, 1],\n             [5, 960, 160, 0.25, 1]\n             ]\n        ]\n\n        # building first layer\n        stem_chs = make_divisible(16 * width, 4)\n        self.conv_stem = nn.Conv2d(in_channels, stem_chs, 3, 2, pad_mode=\"pad\", padding=1, has_bias=False)\n        self.bn1 = nn.BatchNorm2d(stem_chs)\n        self.act1 = nn.ReLU()\n        prev_chs = stem_chs\n\n        # building inverted residual blocks\n        stages = []\n        for cfg in self.cfgs:\n            layers = []\n            for k, exp_size, c, se_ratio, s in cfg:\n                out_chs = make_divisible(c * width, 4)\n                mid_chs = make_divisible(exp_size * width, 4)\n                layers.append(GhostBottleneck(prev_chs, mid_chs, out_chs, k, s, se_ratio=se_ratio))\n                prev_chs = out_chs\n            stages.append(nn.SequentialCell(layers))\n\n        out_chs = make_divisible(exp_size * width, 4)\n        stages.append(ConvBnAct(prev_chs, out_chs, 1))\n        prev_chs = out_chs\n\n        self.blocks = nn.SequentialCell(stages)\n\n        # building last several layers\n        self.num_features = out_chs = 1280\n        self.global_pool = GlobalAvgPooling(keep_dims=True)\n        self.conv_head = nn.Conv2d(prev_chs, out_chs, 1, 1, pad_mode=\"pad\", padding=0, has_bias=True)\n        self.act2 = nn.ReLU()\n        self.flatten = nn.Flatten()\n        if self.drop_rate > 0.0:\n            self.dropout = Dropout(p=drop_rate)\n        self.classifier = nn.Dense(out_chs, num_classes)\n        self._initialize_weights()\n\n    def _initialize_weights(self) -> None:\n\"\"\"Initialize weights for cells.\"\"\"\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                cell.weight.set_data(init.initializer(init.HeUniform(), cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, nn.BatchNorm2d):\n                cell.gamma.set_data(init.initializer(\"ones\", cell.gamma.shape, cell.gamma.dtype))\n                cell.beta.set_data(init.initializer(\"zeros\", cell.beta.shape, cell.beta.dtype))\n            elif isinstance(cell, nn.Dense):\n                cell.weight.set_data(init.initializer(init.HeUniform(), cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.conv_stem(x)\n        x = self.bn1(x)\n        x = self.act1(x)\n        x = self.blocks(x)\n        return x\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.global_pool(x)\n        x = self.conv_head(x)\n        x = self.act2(x)\n        x = self.flatten(x)\n        if self.drop_rate > 0.0:\n            x = self.dropout(x)\n        x = self.classifier(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.ghostnet.ghostnet_050","title":"mindcv.models.ghostnet.ghostnet_050(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

GhostNet-0.5x

Source code in mindcv/models/ghostnet.py
@register_model\ndef ghostnet_050(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):\n\"\"\" GhostNet-0.5x \"\"\"\n    default_cfg = default_cfgs[\"ghostnet_050\"]\n    model = GhostNet(width=0.5, in_channels=in_channels, num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.ghostnet.ghostnet_100","title":"mindcv.models.ghostnet.ghostnet_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

GhostNet-1.0x

Source code in mindcv/models/ghostnet.py
@register_model\ndef ghostnet_100(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):\n\"\"\" GhostNet-1.0x \"\"\"\n    default_cfg = default_cfgs[\"ghostnet_100\"]\n    model = GhostNet(width=1.0, in_channels=in_channels, num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.ghostnet.ghostnet_130","title":"mindcv.models.ghostnet.ghostnet_130(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

GhostNet-1.3x

Source code in mindcv/models/ghostnet.py
@register_model\ndef ghostnet_130(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):\n\"\"\" GhostNet-1.3x \"\"\"\n    default_cfg = default_cfgs[\"ghostnet_130\"]\n    model = GhostNet(width=1.3, in_channels=in_channels, num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#hrnet","title":"hrnet","text":""},{"location":"reference/models/#mindcv.models.hrnet.HRNet","title":"mindcv.models.hrnet.HRNet","text":"

Bases: nn.Cell

HRNet Backbone, based on \"Deep High-Resolution Representation Learning for Visual Recognition\" <https://arxiv.org/abs/1908.07919>_.

PARAMETER DESCRIPTION stage_cfg

Configuration of the extra blocks. It accepts a dictionay storing the detail config of each block. which include num_modules, num_branches, block, num_blocks, num_channels. For detail example, please check the implementation of hrnet_w32 and hrnet_w48.

TYPE: Dict[str, Dict[str, int]]

num_classes

number of classification classes. Default: 1000.

TYPE: int DEFAULT: 1000

in_channels

Number the channels of the input. Default: 3.

TYPE: int DEFAULT: 3

Source code in mindcv/models/hrnet.py
class HRNet(nn.Cell):\nr\"\"\"HRNet Backbone, based on\n    `\"Deep High-Resolution Representation Learning for Visual Recognition\"\n    <https://arxiv.org/abs/1908.07919>`_.\n\n    Args:\n        stage_cfg: Configuration of the extra blocks. It accepts a dictionay\n            storing the detail config of each block. which include `num_modules`,\n            `num_branches`, `block`, `num_blocks`, `num_channels`. For detail example,\n            please check the implementation of `hrnet_w32` and `hrnet_w48`.\n        num_classes: number of classification classes. Default: 1000.\n        in_channels: Number the channels of the input. Default: 3.\n    \"\"\"\n\n    blocks_dict = {\"BASIC\": BasicBlock, \"BOTTLENECK\": Bottleneck}\n\n    def __init__(\n        self,\n        stage_cfg: Dict[str, Dict[str, int]],\n        num_classes: int = 1000,\n        in_channels: int = 3,\n    ) -> None:\n        super().__init__()\n\n        self.stage_cfg = stage_cfg\n        # stem net\n        self.conv1 = nn.Conv2d(\n            in_channels, 64, kernel_size=3, stride=2, padding=1, pad_mode=\"pad\"\n        )\n        self.bn1 = nn.BatchNorm2d(64)\n        self.conv2 = nn.Conv2d(\n            64, 64, kernel_size=3, stride=2, padding=1, pad_mode=\"pad\"\n        )\n        self.bn2 = nn.BatchNorm2d(64)\n        self.relu = nn.ReLU()\n\n        # stage 1\n        self.stage1_cfg = self.stage_cfg[\"stage1\"]\n        num_channels = self.stage1_cfg[\"num_channels\"][0]\n        num_blocks = self.stage1_cfg[\"num_blocks\"][0]\n        block = self.blocks_dict[self.stage1_cfg[\"block\"]]\n        self.layer1 = self._make_layer(block, 64, num_channels, num_blocks)\n\n        # stage 2\n        self.stage2_cfg = self.stage_cfg[\"stage2\"]\n        num_channels = self.stage2_cfg[\"num_channels\"]\n        block = self.blocks_dict[self.stage2_cfg[\"block\"]]\n        num_channels = [\n            num_channels[i] * block.expansion for i in range(len(num_channels))\n        ]\n\n        self.transition1, self.transition1_flags = self._make_transition_layer(\n            [256], num_channels\n        )\n        self.stage2, pre_stage_channels = self._make_stage(\n            self.stage2_cfg, num_channels\n        )\n\n        # stage 3\n        self.stage3_cfg = self.stage_cfg[\"stage3\"]\n        num_channels = self.stage3_cfg[\"num_channels\"]\n        block = self.blocks_dict[self.stage3_cfg[\"block\"]]\n        num_channels = [\n            num_channels[i] * block.expansion for i in range(len(num_channels))\n        ]\n\n        self.transition2, self.transition2_flags = self._make_transition_layer(\n            pre_stage_channels, num_channels\n        )\n        self.stage3, pre_stage_channels = self._make_stage(\n            self.stage3_cfg, num_channels\n        )\n\n        # stage 4\n        self.stage4_cfg = self.stage_cfg[\"stage4\"]\n        num_channels = self.stage4_cfg[\"num_channels\"]\n        block = self.blocks_dict[self.stage4_cfg[\"block\"]]\n        num_channels = [\n            num_channels[i] * block.expansion for i in range(len(num_channels))\n        ]\n        self.transition3, self.transition3_flags = self._make_transition_layer(\n            pre_stage_channels, num_channels\n        )\n        self.stage4, pre_stage_channels = self._make_stage(\n            self.stage4_cfg, num_channels\n        )\n\n        # head\n        self.pool = GlobalAvgPooling()\n        self.incre_modules, self.downsample_modules, self.final_layer = self._make_head(\n            pre_stage_channels\n        )\n        self.classifier = nn.Dense(2048, num_classes)\n\n    def _make_head(self, pre_stage_channels: List[int]):\n        head_block = Bottleneck\n        head_channels = [32, 64, 128, 256]\n\n        # increase the #channesl on each resolution\n        # from C, 2C, 4C, 8C to 128, 256, 512, 1024\n        incre_modules = list()\n        for i, channels in enumerate(pre_stage_channels):\n            incre_module = self._make_layer(\n                head_block, channels, head_channels[i], 1, stride=1\n            )\n            incre_modules.append(incre_module)\n        incre_modules = nn.CellList(incre_modules)\n\n        # downsample modules\n        downsamp_modules = []\n        for i in range(len(pre_stage_channels) - 1):\n            in_channels = head_channels[i] * head_block.expansion\n            out_channels = head_channels[i + 1] * head_block.expansion\n\n            downsamp_module = nn.SequentialCell(\n                nn.Conv2d(\n                    in_channels=in_channels,\n                    out_channels=out_channels,\n                    kernel_size=3,\n                    stride=2,\n                    pad_mode=\"pad\",\n                    padding=1,\n                ),\n                nn.BatchNorm2d(out_channels),\n                nn.ReLU(),\n            )\n\n            downsamp_modules.append(downsamp_module)\n        downsamp_modules = nn.CellList(downsamp_modules)\n\n        final_layer = nn.SequentialCell(\n            nn.Conv2d(\n                in_channels=head_channels[3] * head_block.expansion,\n                out_channels=2048,\n                kernel_size=1,\n                stride=1,\n                padding=0,\n            ),\n            nn.BatchNorm2d(2048),\n            nn.ReLU(),\n        )\n\n        return incre_modules, downsamp_modules, final_layer\n\n    def _make_transition_layer(\n        self, num_channels_pre_layer: List[int], num_channels_cur_layer: List[int]\n    ) -> Tuple[nn.CellList, List[bool]]:\n        num_branches_cur = len(num_channels_cur_layer)\n        num_branches_pre = len(num_channels_pre_layer)\n\n        transition_layers = []\n        transition_layers_flags = []\n        for i in range(num_branches_cur):\n            if i < num_branches_pre:\n                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:\n                    transition_layers.append(\n                        nn.SequentialCell(\n                            nn.Conv2d(\n                                num_channels_pre_layer[i],\n                                num_channels_cur_layer[i],\n                                kernel_size=3,\n                                padding=1,\n                                pad_mode=\"pad\",\n                            ),\n                            nn.BatchNorm2d(num_channels_cur_layer[i]),\n                            nn.ReLU(),\n                        )\n                    )\n                    transition_layers_flags.append(True)\n                else:\n                    transition_layers.append(IdentityCell())\n                    transition_layers_flags.append(False)\n            else:\n                conv3x3s = []\n                for j in range(i + 1 - num_branches_pre):\n                    inchannels = num_channels_pre_layer[-1]\n                    outchannels = (\n                        num_channels_cur_layer[i]\n                        if j == i - num_branches_pre\n                        else inchannels\n                    )\n                    conv3x3s.append(\n                        nn.SequentialCell(\n                            [\n                                nn.Conv2d(\n                                    inchannels,\n                                    outchannels,\n                                    kernel_size=3,\n                                    stride=2,\n                                    padding=1,\n                                    pad_mode=\"pad\",\n                                ),\n                                nn.BatchNorm2d(outchannels),\n                                nn.ReLU(),\n                            ]\n                        )\n                    )\n                transition_layers.append(nn.SequentialCell(conv3x3s))\n                transition_layers_flags.append(True)\n\n        return nn.CellList(transition_layers), transition_layers_flags\n\n    def _make_layer(\n        self,\n        block: Type[Union[BasicBlock, Bottleneck]],\n        in_channels: int,\n        out_channels: int,\n        blocks: int,\n        stride: int = 1,\n    ) -> nn.SequentialCell:\n        downsample = None\n        if stride != 1 or in_channels != out_channels * block.expansion:\n            downsample = nn.SequentialCell(\n                nn.Conv2d(\n                    in_channels,\n                    out_channels * block.expansion,\n                    kernel_size=1,\n                    stride=stride,\n                ),\n                nn.BatchNorm2d(out_channels * block.expansion),\n            )\n\n        layers = []\n        layers.append(block(in_channels, out_channels, stride, down_sample=downsample))\n        for _ in range(1, blocks):\n            layers.append(block(out_channels * block.expansion, out_channels))\n\n        return nn.SequentialCell(layers)\n\n    def _make_stage(\n        self,\n        layer_config: Dict[str, int],\n        num_inchannels: int,\n        multi_scale_output: bool = True,\n    ) -> Tuple[nn.SequentialCell, List[int]]:\n        num_modules = layer_config[\"num_modules\"]\n        num_branches = layer_config[\"num_branches\"]\n        num_blocks = layer_config[\"num_blocks\"]\n        num_channels = layer_config[\"num_channels\"]\n        block = self.blocks_dict[layer_config[\"block\"]]\n\n        modules = []\n        for i in range(num_modules):\n            # multi_scale_output is only used last module\n            if not multi_scale_output and i == num_modules - 1:\n                reset_multi_scale_output = False\n            else:\n                reset_multi_scale_output = True\n\n            modules.append(\n                HRModule(\n                    num_branches,\n                    block,\n                    num_blocks,\n                    num_inchannels,\n                    num_channels,\n                    reset_multi_scale_output,\n                )\n            )\n            num_inchannels = modules[-1].num_inchannels\n\n        return nn.SequentialCell(modules), num_inchannels\n\n    def forward_features(self, x: Tensor) -> List[Tensor]:\n\"\"\"Perform the feature extraction.\n\n        Args:\n            x: Tensor\n\n        Returns:\n            Extracted feature\n        \"\"\"\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.conv2(x)\n        x = self.bn2(x)\n        x = self.relu(x)\n\n        # stage 1\n        x = self.layer1(x)\n\n        # stage 2\n        x_list = []\n        for i in range(self.stage2_cfg[\"num_branches\"]):\n            if self.transition1_flags[i]:\n                x_list.append(self.transition1[i](x))\n            else:\n                x_list.append(x)\n        y_list = self.stage2(x_list)\n\n        # stage 3\n        x_list = []\n        for i in range(self.stage3_cfg[\"num_branches\"]):\n            if self.transition2_flags[i]:\n                x_list.append(self.transition2[i](y_list[-1]))\n            else:\n                x_list.append(y_list[i])\n        y_list = self.stage3(x_list)\n\n        # stage 4\n        x_list = []\n        for i in range(self.stage4_cfg[\"num_branches\"]):\n            if self.transition3_flags[i]:\n                x_list.append(self.transition3[i](y_list[-1]))\n            else:\n                x_list.append(y_list[i])\n        y = self.stage4(x_list)\n\n        return y\n\n    def forward_head(self, x: List[Tensor]) -> Tensor:\n        y = self.incre_modules[0](x[0])\n        for i in range(len(self.downsample_modules)):\n            y = self.incre_modules[i + 1](x[i + 1]) + self.downsample_modules[i](y)\n\n        y = self.final_layer(y)\n        y = self.pool(y)\n        y = self.classifier(y)\n        return y\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.hrnet.HRNet.forward_features","title":"mindcv.models.hrnet.HRNet.forward_features(x)","text":"

Perform the feature extraction.

PARAMETER DESCRIPTION x

Tensor

TYPE: Tensor

RETURNS DESCRIPTION List[Tensor]

Extracted feature

Source code in mindcv/models/hrnet.py
def forward_features(self, x: Tensor) -> List[Tensor]:\n\"\"\"Perform the feature extraction.\n\n    Args:\n        x: Tensor\n\n    Returns:\n        Extracted feature\n    \"\"\"\n    x = self.conv1(x)\n    x = self.bn1(x)\n    x = self.relu(x)\n    x = self.conv2(x)\n    x = self.bn2(x)\n    x = self.relu(x)\n\n    # stage 1\n    x = self.layer1(x)\n\n    # stage 2\n    x_list = []\n    for i in range(self.stage2_cfg[\"num_branches\"]):\n        if self.transition1_flags[i]:\n            x_list.append(self.transition1[i](x))\n        else:\n            x_list.append(x)\n    y_list = self.stage2(x_list)\n\n    # stage 3\n    x_list = []\n    for i in range(self.stage3_cfg[\"num_branches\"]):\n        if self.transition2_flags[i]:\n            x_list.append(self.transition2[i](y_list[-1]))\n        else:\n            x_list.append(y_list[i])\n    y_list = self.stage3(x_list)\n\n    # stage 4\n    x_list = []\n    for i in range(self.stage4_cfg[\"num_branches\"]):\n        if self.transition3_flags[i]:\n            x_list.append(self.transition3[i](y_list[-1]))\n        else:\n            x_list.append(y_list[i])\n    y = self.stage4(x_list)\n\n    return y\n
"},{"location":"reference/models/#mindcv.models.hrnet.hrnet_w32","title":"mindcv.models.hrnet.hrnet_w32(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get HRNet with width=32 model. Refer to the base class models.HRNet for more details.

PARAMETER DESCRIPTION pretrained

Whether the model is pretrained. Default: False

TYPE: bool DEFAULT: False

num_classes

number of classification classes. Default: 1000

TYPE: int DEFAULT: 1000

in_channels

Number of input channels. Default: 3

TYPE: int DEFAULT: 3

RETURNS DESCRIPTION Union[HRNet, HRNetFeatures]

HRNet model

Source code in mindcv/models/hrnet.py
@register_model\ndef hrnet_w32(\n    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs\n) -> Union[HRNet, HRNetFeatures]:\n\"\"\"Get HRNet with width=32 model.\n    Refer to the base class `models.HRNet` for more details.\n\n    Args:\n        pretrained: Whether the model is pretrained. Default: False\n        num_classes: number of classification classes. Default: 1000\n        in_channels: Number of input channels. Default: 3\n\n    Returns:\n        HRNet model\n    \"\"\"\n    default_cfg = default_cfgs[\"hrnet_w32\"]\n    stage_cfg = dict(\n        stage1=dict(\n            num_modules=1,\n            num_branches=1,\n            block=\"BOTTLENECK\",\n            num_blocks=[4],\n            num_channels=[64],\n        ),\n        stage2=dict(\n            num_modules=1,\n            num_branches=2,\n            block=\"BASIC\",\n            num_blocks=[4, 4],\n            num_channels=[32, 64],\n        ),\n        stage3=dict(\n            num_modules=4,\n            num_branches=3,\n            block=\"BASIC\",\n            num_blocks=[4, 4, 4],\n            num_channels=[32, 64, 128],\n        ),\n        stage4=dict(\n            num_modules=3,\n            num_branches=4,\n            block=\"BASIC\",\n            num_blocks=[4, 4, 4, 4],\n            num_channels=[32, 64, 128, 256],\n        ),\n    )\n    model_args = dict(stage_cfg=stage_cfg, num_classes=num_classes, in_channels=in_channels, **kwargs)\n    return _create_hrnet(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.hrnet.hrnet_w48","title":"mindcv.models.hrnet.hrnet_w48(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get HRNet with width=48 model. Refer to the base class models.HRNet for more details.

PARAMETER DESCRIPTION pretrained

Whether the model is pretrained. Default: False

TYPE: bool DEFAULT: False

num_classes

number of classification classes. Default: 1000

TYPE: int DEFAULT: 1000

in_channels

Number of input channels. Default: 3

TYPE: int DEFAULT: 3

RETURNS DESCRIPTION Union[HRNet, HRNetFeatures]

HRNet model

Source code in mindcv/models/hrnet.py
@register_model\ndef hrnet_w48(\n    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs\n) -> Union[HRNet, HRNetFeatures]:\n\"\"\"Get HRNet with width=48 model.\n    Refer to the base class `models.HRNet` for more details.\n\n    Args:\n        pretrained: Whether the model is pretrained. Default: False\n        num_classes: number of classification classes. Default: 1000\n        in_channels: Number of input channels. Default: 3\n\n    Returns:\n        HRNet model\n    \"\"\"\n    default_cfg = default_cfgs[\"hrnet_w48\"]\n    stage_cfg = dict(\n        stage1=dict(\n            num_modules=1,\n            num_branches=1,\n            block=\"BOTTLENECK\",\n            num_blocks=[4],\n            num_channels=[64],\n        ),\n        stage2=dict(\n            num_modules=1,\n            num_branches=2,\n            block=\"BASIC\",\n            num_blocks=[4, 4],\n            num_channels=[48, 96],\n        ),\n        stage3=dict(\n            num_modules=4,\n            num_branches=3,\n            block=\"BASIC\",\n            num_blocks=[4, 4, 4],\n            num_channels=[48, 96, 192],\n        ),\n        stage4=dict(\n            num_modules=3,\n            num_branches=4,\n            block=\"BASIC\",\n            num_blocks=[4, 4, 4, 4],\n            num_channels=[48, 96, 192, 384],\n        ),\n    )\n    model_args = dict(stage_cfg=stage_cfg, num_classes=num_classes, in_channels=in_channels, **kwargs)\n    return _create_hrnet(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#inceptionv3","title":"inceptionv3","text":""},{"location":"reference/models/#mindcv.models.inceptionv3.InceptionV3","title":"mindcv.models.inceptionv3.InceptionV3","text":"

Bases: nn.Cell

Inception v3 model architecture from \"Rethinking the Inception Architecture for Computer Vision\" <https://arxiv.org/abs/1512.00567>_.

.. note:: Important: In contrast to the other models the inception_v3 expects tensors with a size of N x 3 x 299 x 299, so ensure your images are sized accordingly.

PARAMETER DESCRIPTION num_classes

number of classification classes. Default: 1000.

TYPE: int DEFAULT: 1000

aux_logits

use auxiliary classifier or not. Default: False.

TYPE: bool DEFAULT: True

in_channels

number the channels of the input. Default: 3.

TYPE: int DEFAULT: 3

drop_rate

dropout rate of the layer before main classifier. Default: 0.2.

TYPE: float DEFAULT: 0.2

Source code in mindcv/models/inceptionv3.py
class InceptionV3(nn.Cell):\nr\"\"\"Inception v3 model architecture from\n    `\"Rethinking the Inception Architecture for Computer Vision\" <https://arxiv.org/abs/1512.00567>`_.\n\n    .. note::\n        **Important**: In contrast to the other models the inception_v3 expects tensors with a size of\n        N x 3 x 299 x 299, so ensure your images are sized accordingly.\n\n    Args:\n        num_classes: number of classification classes. Default: 1000.\n        aux_logits: use auxiliary classifier or not. Default: False.\n        in_channels: number the channels of the input. Default: 3.\n        drop_rate: dropout rate of the layer before main classifier. Default: 0.2.\n    \"\"\"\n\n    def __init__(\n        self,\n        num_classes: int = 1000,\n        aux_logits: bool = True,\n        in_channels: int = 3,\n        drop_rate: float = 0.2,\n    ) -> None:\n        super().__init__()\n        self.aux_logits = aux_logits\n        self.conv1a = BasicConv2d(in_channels, 32, kernel_size=3, stride=2, pad_mode=\"valid\")\n        self.conv2a = BasicConv2d(32, 32, kernel_size=3, stride=1, pad_mode=\"valid\")\n        self.conv2b = BasicConv2d(32, 64, kernel_size=3, stride=1)\n        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2)\n        self.conv3b = BasicConv2d(64, 80, kernel_size=1)\n        self.conv4a = BasicConv2d(80, 192, kernel_size=3, pad_mode=\"valid\")\n        self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2)\n        self.inception5b = InceptionA(192, pool_features=32)\n        self.inception5c = InceptionA(256, pool_features=64)\n        self.inception5d = InceptionA(288, pool_features=64)\n        self.inception6a = InceptionB(288)\n        self.inception6b = InceptionC(768, channels_7x7=128)\n        self.inception6c = InceptionC(768, channels_7x7=160)\n        self.inception6d = InceptionC(768, channels_7x7=160)\n        self.inception6e = InceptionC(768, channels_7x7=192)\n        if self.aux_logits:\n            self.aux = InceptionAux(768, num_classes)\n        self.inception7a = InceptionD(768)\n        self.inception7b = InceptionE(1280)\n        self.inception7c = InceptionE(2048)\n\n        self.pool = GlobalAvgPooling()\n        self.dropout = Dropout(p=drop_rate)\n        self.num_features = 2048\n        self.classifier = nn.Dense(self.num_features, num_classes)\n        self._initialize_weights()\n\n    def _initialize_weights(self) -> None:\n\"\"\"Initialize weights for cells.\"\"\"\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                cell.weight.set_data(\n                    init.initializer(init.XavierUniform(), cell.weight.shape, cell.weight.dtype))\n\n    def forward_preaux(self, x: Tensor) -> Tensor:\n        x = self.conv1a(x)\n        x = self.conv2a(x)\n        x = self.conv2b(x)\n        x = self.maxpool1(x)\n        x = self.conv3b(x)\n        x = self.conv4a(x)\n        x = self.maxpool2(x)\n        x = self.inception5b(x)\n        x = self.inception5c(x)\n        x = self.inception5d(x)\n        x = self.inception6a(x)\n        x = self.inception6b(x)\n        x = self.inception6c(x)\n        x = self.inception6d(x)\n        x = self.inception6e(x)\n        return x\n\n    def forward_postaux(self, x: Tensor) -> Tensor:\n        x = self.inception7a(x)\n        x = self.inception7b(x)\n        x = self.inception7c(x)\n        return x\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.forward_preaux(x)\n        x = self.forward_postaux(x)\n        return x\n\n    def construct(self, x: Tensor) -> Union[Tensor, Tuple[Tensor, Tensor]]:\n        x = self.forward_preaux(x)\n        if self.training and self.aux_logits:\n            aux = self.aux(x)\n        else:\n            aux = None\n        x = self.forward_postaux(x)\n\n        x = self.pool(x)\n        x = self.dropout(x)\n        x = self.classifier(x)\n\n        if self.training and self.aux_logits:\n            return x, aux\n        return x\n
"},{"location":"reference/models/#mindcv.models.inceptionv3.inception_v3","title":"mindcv.models.inceptionv3.inception_v3(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get InceptionV3 model. Refer to the base class models.InceptionV3 for more details.

Source code in mindcv/models/inceptionv3.py
@register_model\ndef inception_v3(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> InceptionV3:\n\"\"\"Get InceptionV3 model.\n    Refer to the base class `models.InceptionV3` for more details.\"\"\"\n    default_cfg = default_cfgs[\"inception_v3\"]\n    model = InceptionV3(num_classes=num_classes, aux_logits=True, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#inceptionv4","title":"inceptionv4","text":""},{"location":"reference/models/#mindcv.models.inceptionv4.InceptionV4","title":"mindcv.models.inceptionv4.InceptionV4","text":"

Bases: nn.Cell

Inception v4 model architecture from \"Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning\" <https://arxiv.org/abs/1602.07261>_. # noqa: E501

PARAMETER DESCRIPTION num_classes

number of classification classes. Default: 1000.

TYPE: int DEFAULT: 1000

in_channels

number the channels of the input. Default: 3.

TYPE: int DEFAULT: 3

drop_rate

dropout rate of the layer before main classifier. Default: 0.2.

TYPE: float DEFAULT: 0.2

Source code in mindcv/models/inceptionv4.py
class InceptionV4(nn.Cell):\nr\"\"\"Inception v4 model architecture from\n    `\"Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning\" <https://arxiv.org/abs/1602.07261>`_.  # noqa: E501\n\n    Args:\n        num_classes: number of classification classes. Default: 1000.\n        in_channels: number the channels of the input. Default: 3.\n        drop_rate: dropout rate of the layer before main classifier. Default: 0.2.\n    \"\"\"\n\n    def __init__(\n        self,\n        num_classes: int = 1000,\n        in_channels: int = 3,\n        drop_rate: float = 0.2,\n    ) -> None:\n        super().__init__()\n        blocks = [Stem(in_channels)]\n        for _ in range(4):\n            blocks.append(InceptionA())\n        blocks.append(ReductionA())\n        for _ in range(7):\n            blocks.append(InceptionB())\n        blocks.append(ReductionB())\n        for _ in range(3):\n            blocks.append(InceptionC())\n        self.features = nn.SequentialCell(blocks)\n\n        self.pool = GlobalAvgPooling()\n        self.dropout = Dropout(p=drop_rate)\n        self.num_features = 1536\n        self.classifier = nn.Dense(self.num_features, num_classes)\n        self._initialize_weights()\n\n    def _initialize_weights(self) -> None:\n\"\"\"Initialize weights for cells.\"\"\"\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                cell.weight.set_data(\n                    init.initializer(init.XavierUniform(), cell.weight.shape, cell.weight.dtype))\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.features(x)\n        return x\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.pool(x)\n        x = self.dropout(x)\n        x = self.classifier(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.inceptionv4.inception_v4","title":"mindcv.models.inceptionv4.inception_v4(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get InceptionV4 model. Refer to the base class models.InceptionV4 for more details.

Source code in mindcv/models/inceptionv4.py
@register_model\ndef inception_v4(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> InceptionV4:\n\"\"\"Get InceptionV4 model.\n    Refer to the base class `models.InceptionV4` for more details.\"\"\"\n    default_cfg = default_cfgs[\"inception_v4\"]\n    model = InceptionV4(num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mixnet","title":"mixnet","text":""},{"location":"reference/models/#mindcv.models.mixnet.MixNet","title":"mindcv.models.mixnet.MixNet","text":"

Bases: nn.Cell

MixNet model class, based on \"MixConv: Mixed Depthwise Convolutional Kernels\" <https://arxiv.org/abs/1907.09595>_

PARAMETER DESCRIPTION arch

size of the architecture. \"small\", \"medium\" or \"large\". Default: \"small\".

TYPE: str DEFAULT: 'small'

num_classes

number of classification classes. Default: 1000.

TYPE: int DEFAULT: 1000

in_channels

number of the channels of the input. Default: 3.

TYPE: int DEFAULT: 3

feature_size

numbet of the channels of the output features. Default: 1536.

TYPE: int DEFAULT: 1536

drop_rate

rate of dropout for classifier. Default: 0.2.

TYPE: float DEFAULT: 0.2

depth_multiplier

expansion coefficient of channels. Default: 1.0.

TYPE: float DEFAULT: 1.0

Source code in mindcv/models/mixnet.py
class MixNet(nn.Cell):\nr\"\"\"MixNet model class, based on\n    `\"MixConv: Mixed Depthwise Convolutional Kernels\" <https://arxiv.org/abs/1907.09595>`_\n\n    Args:\n        arch: size of the architecture. \"small\", \"medium\" or \"large\". Default: \"small\".\n        num_classes: number of classification classes. Default: 1000.\n        in_channels: number of the channels of the input. Default: 3.\n        feature_size: numbet of the channels of the output features. Default: 1536.\n        drop_rate: rate of dropout for classifier. Default: 0.2.\n        depth_multiplier: expansion coefficient of channels. Default: 1.0.\n    \"\"\"\n\n    def __init__(\n        self,\n        arch: str = \"small\",\n        num_classes: int = 1000,\n        in_channels: int = 3,\n        feature_size: int = 1536,\n        drop_rate: float = 0.2,\n        depth_multiplier: float = 1.0\n    ) -> None:\n        super(MixNet, self).__init__()\n        if arch == \"small\":\n            block_configs = [\n                [16, 16, [3], [1], [1], 1, 1, \"ReLU\", 0.0],\n                [16, 24, [3], [1, 1], [1, 1], 2, 6, \"ReLU\", 0.0],\n                [24, 24, [3], [1, 1], [1, 1], 1, 3, \"ReLU\", 0.0],\n                [24, 40, [3, 5, 7], [1], [1], 2, 6, \"Swish\", 0.5],\n                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, \"Swish\", 0.5],\n                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, \"Swish\", 0.5],\n                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, \"Swish\", 0.5],\n                [40, 80, [3, 5, 7], [1], [1, 1], 2, 6, \"Swish\", 0.25],\n                [80, 80, [3, 5], [1], [1, 1], 1, 6, \"Swish\", 0.25],\n                [80, 80, [3, 5], [1], [1, 1], 1, 6, \"Swish\", 0.25],\n                [80, 120, [3, 5, 7], [1, 1], [1, 1], 1, 6, \"Swish\", 0.5],\n                [120, 120, [3, 5, 7, 9], [1, 1], [1, 1], 1, 3, \"Swish\", 0.5],\n                [120, 120, [3, 5, 7, 9], [1, 1], [1, 1], 1, 3, \"Swish\", 0.5],\n                [120, 200, [3, 5, 7, 9, 11], [1], [1], 2, 6, \"Swish\", 0.5],\n                [200, 200, [3, 5, 7, 9], [1], [1, 1], 1, 6, \"Swish\", 0.5],\n                [200, 200, [3, 5, 7, 9], [1], [1, 1], 1, 6, \"Swish\", 0.5]\n            ]\n            stem_channels = 16\n            drop_rate = drop_rate\n        else:\n            block_configs = [\n                [24, 24, [3], [1], [1], 1, 1, \"ReLU\", 0.0],\n                [24, 32, [3, 5, 7], [1, 1], [1, 1], 2, 6, \"ReLU\", 0.0],\n                [32, 32, [3], [1, 1], [1, 1], 1, 3, \"ReLU\", 0.0],\n                [32, 40, [3, 5, 7, 9], [1], [1], 2, 6, \"Swish\", 0.5],\n                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, \"Swish\", 0.5],\n                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, \"Swish\", 0.5],\n                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, \"Swish\", 0.5],\n                [40, 80, [3, 5, 7], [1], [1], 2, 6, \"Swish\", 0.25],\n                [80, 80, [3, 5, 7, 9], [1, 1], [1, 1], 1, 6, \"Swish\", 0.25],\n                [80, 80, [3, 5, 7, 9], [1, 1], [1, 1], 1, 6, \"Swish\", 0.25],\n                [80, 80, [3, 5, 7, 9], [1, 1], [1, 1], 1, 6, \"Swish\", 0.25],\n                [80, 120, [3], [1], [1], 1, 6, \"Swish\", 0.5],\n                [120, 120, [3, 5, 7, 9], [1, 1], [1, 1], 1, 3, \"Swish\", 0.5],\n                [120, 120, [3, 5, 7, 9], [1, 1], [1, 1], 1, 3, \"Swish\", 0.5],\n                [120, 120, [3, 5, 7, 9], [1, 1], [1, 1], 1, 3, \"Swish\", 0.5],\n                [120, 200, [3, 5, 7, 9], [1], [1], 2, 6, \"Swish\", 0.5],\n                [200, 200, [3, 5, 7, 9], [1], [1, 1], 1, 6, \"Swish\", 0.5],\n                [200, 200, [3, 5, 7, 9], [1], [1, 1], 1, 6, \"Swish\", 0.5],\n                [200, 200, [3, 5, 7, 9], [1], [1, 1], 1, 6, \"Swish\", 0.5]\n            ]\n            if arch == \"medium\":\n                stem_channels = 24\n                drop_rate = drop_rate\n            elif arch == \"large\":\n                stem_channels = 24\n                depth_multiplier *= 1.3\n                drop_rate = drop_rate\n            else:\n                raise ValueError(f\"Unsupported model type {arch}\")\n\n        if depth_multiplier != 1.0:\n            stem_channels = _roundchannels(stem_channels * depth_multiplier)\n\n            for i, conf in enumerate(block_configs):\n                conf_ls = list(conf)\n                conf_ls[0] = _roundchannels(conf_ls[0] * depth_multiplier)\n                conf_ls[1] = _roundchannels(conf_ls[1] * depth_multiplier)\n                block_configs[i] = tuple(conf_ls)\n\n        # stem convolution\n        self.stem_conv = nn.SequentialCell([\n            nn.Conv2d(in_channels, stem_channels, 3, stride=2, pad_mode=\"pad\", padding=1),\n            nn.BatchNorm2d(stem_channels),\n            nn.ReLU()\n        ])\n\n        # building MixNet blocks\n        layers = []\n        for inc, outc, k, ek, pk, s, er, ac, se in block_configs:\n            layers.append(MixNetBlock(\n                inc,\n                outc,\n                kernel_size=k,\n                expand_ksize=ek,\n                project_ksize=pk,\n                stride=s,\n                expand_ratio=er,\n                activation=ac,\n                se_ratio=se\n            ))\n        self.layers = nn.SequentialCell(layers)\n\n        # head\n        self.head_conv = nn.SequentialCell([\n            nn.Conv2d(block_configs[-1][1], feature_size, 1, pad_mode=\"pad\", padding=0),\n            nn.BatchNorm2d(feature_size),\n            nn.ReLU()\n        ])\n\n        self.pool = GlobalAvgPooling()\n        self.dropout = Dropout(p=drop_rate)\n        self.classifier = nn.Dense(feature_size, num_classes)\n\n        self._initialize_weights()\n\n    def _initialize_weights(self) -> None:\n\"\"\"Initialize weights for cells.\"\"\"\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                fan_out = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels\n                cell.weight.set_data(\n                    init.initializer(init.Normal(math.sqrt(2.0 / fan_out)),\n                                     cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(\n                        init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, nn.BatchNorm2d):\n                cell.gamma.set_data(init.initializer(\"ones\", cell.gamma.shape, cell.gamma.dtype))\n                cell.beta.set_data(init.initializer(\"zeros\", cell.beta.shape, cell.beta.dtype))\n            elif isinstance(cell, nn.Dense):\n                cell.weight.set_data(\n                    init.initializer(init.Uniform(1.0 / math.sqrt(cell.weight.shape[0])),\n                                     cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.stem_conv(x)\n        x = self.layers(x)\n        x = self.head_conv(x)\n        return x\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.pool(x)\n        x = self.dropout(x)\n        x = self.classifier(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.mixnet.mixnet_l","title":"mindcv.models.mixnet.mixnet_l(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/mixnet.py
@register_model\ndef mixnet_l(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"mixnet_l\"]\n    model = MixNet(arch=\"large\", in_channels=in_channels, num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mixnet.mixnet_m","title":"mindcv.models.mixnet.mixnet_m(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/mixnet.py
@register_model\ndef mixnet_m(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"mixnet_m\"]\n    model = MixNet(arch=\"medium\", in_channels=in_channels, num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mixnet.mixnet_s","title":"mindcv.models.mixnet.mixnet_s(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/mixnet.py
@register_model\ndef mixnet_s(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"mixnet_s\"]\n    model = MixNet(arch=\"small\", in_channels=in_channels, num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mlpmixer","title":"mlpmixer","text":""},{"location":"reference/models/#mindcv.models.mlpmixer.MLPMixer","title":"mindcv.models.mlpmixer.MLPMixer","text":"

Bases: nn.Cell

MLP-Mixer model class, based on \"MLP-Mixer: An all-MLP Architecture for Vision\" <https://arxiv.org/abs/2105.01601>_

PARAMETER DESCRIPTION depth

number of MixerBlocks.

TYPE: int)

patch_size

size of a single image patch.

TYPE: int or tuple)

n_patches

number of patches.

TYPE: int)

n_channels

channels(dimension) of a single embedded patch.

TYPE: int)

token_dim

hidden dim of token-mixing MLP.

TYPE: int)

channel_dim

hidden dim of channel-mixing MLP.

TYPE: int)

num_classes

number of classification classes.

TYPE: int) DEFAULT: 1000

in_channels

number the channels of the input. Default: 3.

DEFAULT: 3

Source code in mindcv/models/mlpmixer.py
class MLPMixer(nn.Cell):\nr\"\"\"MLP-Mixer model class, based on\n    `\"MLP-Mixer: An all-MLP Architecture for Vision\" <https://arxiv.org/abs/2105.01601>`_\n\n    Args:\n        depth (int) : number of MixerBlocks.\n        patch_size (int or tuple) : size of a single image patch.\n        n_patches (int) : number of patches.\n        n_channels (int) : channels(dimension) of a single embedded patch.\n        token_dim (int) : hidden dim of token-mixing MLP.\n        channel_dim (int) : hidden dim of channel-mixing MLP.\n        num_classes (int) : number of classification classes.\n        in_channels: number the channels of the input. Default: 3.\n    \"\"\"\n\n    def __init__(self, depth, patch_size, n_patches, n_channels, token_dim, channel_dim, num_classes=1000,\n                 in_channels=3):\n        super().__init__()\n        self.n_patches = n_patches\n        self.n_channels = n_channels\n        # patch with shape of (3, patch_size, patch_size) is embedded to n_channels dim feature.\n        self.to_patch_embedding = nn.SequentialCell(\n            nn.Conv2d(in_channels, n_channels, patch_size, patch_size, pad_mode=\"pad\", padding=0),\n            TransPose(permutation=(0, 2, 1), embedding=True),\n        )\n        self.mixer_blocks = nn.SequentialCell()\n        for _ in range(depth):\n            self.mixer_blocks.append(MixerBlock(n_patches, n_channels, token_dim, channel_dim))\n        self.layer_norm = nn.LayerNorm((n_channels,))\n        self.mlp_head = nn.Dense(n_channels, num_classes)\n        self.mean = ops.ReduceMean()\n        self._initialize_weights()\n\n    def construct(self, x):\n        x = self.to_patch_embedding(x)\n        x = self.mixer_blocks(x)\n        x = self.layer_norm(x)\n        x = self.mean(x, 1)\n        return self.mlp_head(x)\n\n    def _initialize_weights(self):\n        # todo: implement weights init\n        pass\n
"},{"location":"reference/models/#mindcv.models.mlpmixer.mlp_mixer_b_p16","title":"mindcv.models.mlpmixer.mlp_mixer_b_p16(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/mlpmixer.py
@register_model\ndef mlp_mixer_b_p16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    nl, pr, ls, hs, ds, dc = 12, 16, 196, 768, 384, 3072\n    _check_resolution_and_length_of_patch(pr, ls)\n    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,\n                     num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    default_cfg = default_cfgs[\"mlp_mixer_b_p16\"]\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mlpmixer.mlp_mixer_b_p32","title":"mindcv.models.mlpmixer.mlp_mixer_b_p32(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/mlpmixer.py
@register_model\ndef mlp_mixer_b_p32(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    nl, pr, ls, hs, ds, dc = 12, 32, 49, 768, 384, 3072\n    _check_resolution_and_length_of_patch(pr, ls)\n    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,\n                     num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    default_cfg = default_cfgs[\"mlp_mixer_b_p32\"]\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mlpmixer.mlp_mixer_h_p14","title":"mindcv.models.mlpmixer.mlp_mixer_h_p14(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/mlpmixer.py
@register_model\ndef mlp_mixer_h_p14(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    nl, pr, ls, hs, ds, dc = 32, 14, 256, 1280, 640, 5120\n    _check_resolution_and_length_of_patch(pr, ls)\n    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,\n                     num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    default_cfg = default_cfgs[\"mlp_mixer_h_p14\"]\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mlpmixer.mlp_mixer_l_p16","title":"mindcv.models.mlpmixer.mlp_mixer_l_p16(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/mlpmixer.py
@register_model\ndef mlp_mixer_l_p16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    nl, pr, ls, hs, ds, dc = 24, 16, 196, 1024, 512, 4096\n    _check_resolution_and_length_of_patch(pr, ls)\n    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,\n                     num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    default_cfg = default_cfgs[\"mlp_mixer_l_p16\"]\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mlpmixer.mlp_mixer_l_p32","title":"mindcv.models.mlpmixer.mlp_mixer_l_p32(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/mlpmixer.py
@register_model\ndef mlp_mixer_l_p32(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    nl, pr, ls, hs, ds, dc = 24, 32, 49, 1024, 512, 4096\n    _check_resolution_and_length_of_patch(pr, ls)\n    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,\n                     num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    default_cfg = default_cfgs[\"mlp_mixer_l_p32\"]\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mlpmixer.mlp_mixer_s_p16","title":"mindcv.models.mlpmixer.mlp_mixer_s_p16(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/mlpmixer.py
@register_model\ndef mlp_mixer_s_p16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    nl, pr, ls, hs, ds, dc = 8, 16, 196, 512, 256, 2048\n    _check_resolution_and_length_of_patch(pr, ls)\n    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,\n                     num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    default_cfg = default_cfgs[\"mlp_mixer_s_p16\"]\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mlpmixer.mlp_mixer_s_p32","title":"mindcv.models.mlpmixer.mlp_mixer_s_p32(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/mlpmixer.py
@register_model\ndef mlp_mixer_s_p32(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    # number_of_layers, patch_resolution, length_of_sequence, hidden_size, mpl_dim_sequence, mpl_dim_channel\n    nl, pr, ls, hs, ds, dc = 8, 32, 49, 512, 256, 2048\n    _check_resolution_and_length_of_patch(pr, ls)\n    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs,\n                     token_dim=ds, channel_dim=dc, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    default_cfg = default_cfgs[\"mlp_mixer_s_p32\"]\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mnasnet","title":"mnasnet","text":""},{"location":"reference/models/#mindcv.models.mnasnet.Mnasnet","title":"mindcv.models.mnasnet.Mnasnet","text":"

Bases: nn.Cell

MnasNet model architecture from \"MnasNet: Platform-Aware Neural Architecture Search for Mobile\" <https://arxiv.org/abs/1807.11626>_.

PARAMETER DESCRIPTION alpha

scale factor of model width.

TYPE: float

in_channels

number the channels of the input. Default: 3.

TYPE: int DEFAULT: 3

num_classes

number of classification classes. Default: 1000.

TYPE: int DEFAULT: 1000

drop_rate

dropout rate of the layer before main classifier. Default: 0.2.

TYPE: float DEFAULT: 0.2

Source code in mindcv/models/mnasnet.py
class Mnasnet(nn.Cell):\nr\"\"\"MnasNet model architecture from\n    `\"MnasNet: Platform-Aware Neural Architecture Search for Mobile\" <https://arxiv.org/abs/1807.11626>`_.\n\n    Args:\n        alpha: scale factor of model width.\n        in_channels: number the channels of the input. Default: 3.\n        num_classes: number of classification classes. Default: 1000.\n        drop_rate: dropout rate of the layer before main classifier. Default: 0.2.\n    \"\"\"\n\n    def __init__(\n        self,\n        alpha: float,\n        in_channels: int = 3,\n        num_classes: int = 1000,\n        drop_rate: float = 0.2,\n    ):\n        super().__init__()\n\n        inverted_residual_setting = [\n            # t, c, n, s, k\n            [3, 24, 3, 2, 3],  # -> 56x56\n            [3, 40, 3, 2, 5],  # -> 28x28\n            [6, 80, 3, 2, 5],  # -> 14x14\n            [6, 96, 2, 1, 3],  # -> 14x14\n            [6, 192, 4, 2, 5],  # -> 7x7\n            [6, 320, 1, 1, 3],  # -> 7x7\n        ]\n\n        mid_channels = make_divisible(32 * alpha, 8)\n        input_channels = make_divisible(16 * alpha, 8)\n\n        features: List[nn.Cell] = [\n            nn.Conv2d(in_channels, mid_channels, kernel_size=3, stride=2, pad_mode=\"pad\", padding=1),\n            nn.BatchNorm2d(mid_channels, momentum=0.99, eps=1e-3),\n            nn.ReLU(),\n            nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=1, pad_mode=\"pad\", padding=1,\n                      group=mid_channels),\n            nn.BatchNorm2d(mid_channels, momentum=0.99, eps=1e-3),\n            nn.ReLU(),\n            nn.Conv2d(mid_channels, input_channels, kernel_size=1, stride=1),\n            nn.BatchNorm2d(input_channels, momentum=0.99, eps=1e-3),\n        ]\n\n        for t, c, n, s, k in inverted_residual_setting:\n            output_channels = make_divisible(c * alpha, 8)\n            for i in range(n):\n                stride = s if i == 0 else 1\n                features.append(InvertedResidual(input_channels, output_channels,\n                                                 stride=stride, kernel_size=k, expand_ratio=t))\n                input_channels = output_channels\n\n        features.extend([\n            nn.Conv2d(input_channels, 1280, kernel_size=1, stride=1),\n            nn.BatchNorm2d(1280, momentum=0.99, eps=1e-3),\n            nn.ReLU(),\n        ])\n        self.features = nn.SequentialCell(features)\n        self.pool = GlobalAvgPooling()\n        self.dropout = Dropout(p=drop_rate)\n        self.classifier = nn.Dense(1280, num_classes)\n        self._initialize_weights()\n\n    def _initialize_weights(self) -> None:\n\"\"\"Initialize weights for cells.\"\"\"\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                cell.weight.set_data(\n                    init.initializer(init.HeNormal(mode=\"fan_out\", nonlinearity=\"relu\"),\n                                     cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, nn.BatchNorm2d):\n                cell.gamma.set_data(init.initializer(\"ones\", cell.gamma.shape, cell.gamma.dtype))\n                cell.beta.set_data(init.initializer(\"zeros\", cell.beta.shape, cell.beta.dtype))\n            elif isinstance(cell, nn.Dense):\n                cell.weight.set_data(\n                    init.initializer(init.HeUniform(mode=\"fan_out\", nonlinearity=\"sigmoid\"),\n                                     cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.features(x)\n        return x\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.pool(x)\n        x = self.dropout(x)\n        x = self.classifier(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.mnasnet.mnasnet_050","title":"mindcv.models.mnasnet.mnasnet_050(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MnasNet model with width scaled by 0.5. Refer to the base class models.Mnasnet for more details.

Source code in mindcv/models/mnasnet.py
@register_model\ndef mnasnet_050(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> Mnasnet:\n\"\"\"Get MnasNet model with width scaled by 0.5.\n    Refer to the base class `models.Mnasnet` for more details.\"\"\"\n    default_cfg = default_cfgs[\"mnasnet_050\"]\n    model = Mnasnet(alpha=0.5, in_channels=in_channels, num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mnasnet.mnasnet_075","title":"mindcv.models.mnasnet.mnasnet_075(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MnasNet model with width scaled by 0.75. Refer to the base class models.Mnasnet for more details.

Source code in mindcv/models/mnasnet.py
@register_model\ndef mnasnet_075(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> Mnasnet:\n\"\"\"Get MnasNet model with width scaled by 0.75.\n    Refer to the base class `models.Mnasnet` for more details.\"\"\"\n    default_cfg = default_cfgs[\"mnasnet_075\"]\n    model = Mnasnet(alpha=0.75, in_channels=in_channels, num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mnasnet.mnasnet_100","title":"mindcv.models.mnasnet.mnasnet_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MnasNet model with width scaled by 1.0. Refer to the base class models.Mnasnet for more details.

Source code in mindcv/models/mnasnet.py
@register_model\ndef mnasnet_100(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> Mnasnet:\n\"\"\"Get MnasNet model with width scaled by 1.0.\n    Refer to the base class `models.Mnasnet` for more details.\"\"\"\n    default_cfg = default_cfgs[\"mnasnet_100\"]\n    model = Mnasnet(alpha=1.0, in_channels=in_channels, num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mnasnet.mnasnet_130","title":"mindcv.models.mnasnet.mnasnet_130(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MnasNet model with width scaled by 1.3. Refer to the base class models.Mnasnet for more details.

Source code in mindcv/models/mnasnet.py
@register_model\ndef mnasnet_130(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> Mnasnet:\n\"\"\"Get MnasNet model with width scaled by 1.3.\n    Refer to the base class `models.Mnasnet` for more details.\"\"\"\n    default_cfg = default_cfgs[\"mnasnet_130\"]\n    model = Mnasnet(alpha=1.3, in_channels=in_channels, num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mnasnet.mnasnet_140","title":"mindcv.models.mnasnet.mnasnet_140(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MnasNet model with width scaled by 1.4. Refer to the base class models.Mnasnet for more details.

Source code in mindcv/models/mnasnet.py
@register_model\ndef mnasnet_140(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> Mnasnet:\n\"\"\"Get MnasNet model with width scaled by 1.4.\n    Refer to the base class `models.Mnasnet` for more details.\"\"\"\n    default_cfg = default_cfgs[\"mnasnet_140\"]\n    model = Mnasnet(alpha=1.4, in_channels=in_channels, num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mobilenetv1","title":"mobilenetv1","text":""},{"location":"reference/models/#mindcv.models.mobilenetv1.MobileNetV1","title":"mindcv.models.mobilenetv1.MobileNetV1","text":"

Bases: nn.Cell

MobileNetV1 model class, based on \"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications\" <https://arxiv.org/abs/1704.04861>_ # noqa: E501

PARAMETER DESCRIPTION alpha

scale factor of model width. Default: 1.

TYPE: float DEFAULT: 1.0

in_channels

number the channels of the input. Default: 3.

TYPE: int DEFAULT: 3

num_classes

number of classification classes. Default: 1000.

TYPE: int DEFAULT: 1000

Source code in mindcv/models/mobilenetv1.py
class MobileNetV1(nn.Cell):\nr\"\"\"MobileNetV1 model class, based on\n    `\"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications\" <https://arxiv.org/abs/1704.04861>`_  # noqa: E501\n\n    Args:\n        alpha: scale factor of model width. Default: 1.\n        in_channels: number the channels of the input. Default: 3.\n        num_classes: number of classification classes. Default: 1000.\n    \"\"\"\n\n    def __init__(\n        self,\n        alpha: float = 1.0,\n        in_channels: int = 3,\n        num_classes: int = 1000,\n    ) -> None:\n        super().__init__()\n        input_channels = int(32 * alpha)\n        # Setting of depth-wise separable conv\n        # c: number of output channel\n        # s: stride of depth-wise conv\n        block_setting = [\n            # c, s\n            [64, 1],\n            [128, 2],\n            [128, 1],\n            [256, 2],\n            [256, 1],\n            [512, 2],\n            [512, 1],\n            [512, 1],\n            [512, 1],\n            [512, 1],\n            [512, 1],\n            [1024, 2],\n            [1024, 1],\n        ]\n\n        features = [\n            nn.Conv2d(in_channels, input_channels, 3, 2, pad_mode=\"pad\", padding=1, has_bias=False),\n            nn.BatchNorm2d(input_channels),\n            nn.ReLU(),\n        ]\n        for c, s in block_setting:\n            output_channel = int(c * alpha)\n            features.append(depthwise_separable_conv(input_channels, output_channel, s))\n            input_channels = output_channel\n        self.features = nn.SequentialCell(features)\n\n        self.pool = GlobalAvgPooling()\n        self.classifier = nn.Dense(input_channels, num_classes)\n        self._initialize_weights()\n\n    def _initialize_weights(self) -> None:\n\"\"\"Initialize weights for cells.\"\"\"\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                cell.weight.set_data(init.initializer(init.XavierUniform(), cell.weight.shape, cell.weight.dtype))\n            if isinstance(cell, nn.Dense):\n                cell.weight.set_data(init.initializer(init.TruncatedNormal(), cell.weight.shape, cell.weight.dtype))\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.features(x)\n        return x\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.pool(x)\n        x = self.classifier(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.mobilenetv1.mobilenet_v1_025","title":"mindcv.models.mobilenetv1.mobilenet_v1_025(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV1 model with width scaled by 0.25. Refer to the base class models.MobileNetV1 for more details.

Source code in mindcv/models/mobilenetv1.py
@register_model\ndef mobilenet_v1_025(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV1:\n\"\"\"Get MobileNetV1 model with width scaled by 0.25.\n    Refer to the base class `models.MobileNetV1` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v1_025\"]\n    model = MobileNetV1(alpha=0.25, in_channels=in_channels, num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv1.mobilenet_v1_050","title":"mindcv.models.mobilenetv1.mobilenet_v1_050(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV1 model with width scaled by 0.5. Refer to the base class models.MobileNetV1 for more details.

Source code in mindcv/models/mobilenetv1.py
@register_model\ndef mobilenet_v1_050(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV1:\n\"\"\"Get MobileNetV1 model with width scaled by 0.5.\n    Refer to the base class `models.MobileNetV1` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v1_050\"]\n    model = MobileNetV1(alpha=0.5, in_channels=in_channels, num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv1.mobilenet_v1_075","title":"mindcv.models.mobilenetv1.mobilenet_v1_075(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV1 model with width scaled by 0.75. Refer to the base class models.MobileNetV1 for more details.

Source code in mindcv/models/mobilenetv1.py
@register_model\ndef mobilenet_v1_075(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV1:\n\"\"\"Get MobileNetV1 model with width scaled by 0.75.\n    Refer to the base class `models.MobileNetV1` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v1_075\"]\n    model = MobileNetV1(alpha=0.75, in_channels=in_channels, num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv1.mobilenet_v1_100","title":"mindcv.models.mobilenetv1.mobilenet_v1_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV1 model without width scaling. Refer to the base class models.MobileNetV1 for more details.

Source code in mindcv/models/mobilenetv1.py
@register_model\ndef mobilenet_v1_100(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV1:\n\"\"\"Get MobileNetV1 model without width scaling.\n    Refer to the base class `models.MobileNetV1` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v1_100\"]\n    model = MobileNetV1(alpha=1.0, in_channels=in_channels, num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mobilenetv2","title":"mobilenetv2","text":""},{"location":"reference/models/#mindcv.models.mobilenetv2.MobileNetV2","title":"mindcv.models.mobilenetv2.MobileNetV2","text":"

Bases: nn.Cell

MobileNetV2 model class, based on \"MobileNetV2: Inverted Residuals and Linear Bottlenecks\" <https://arxiv.org/abs/1801.04381>_

PARAMETER DESCRIPTION alpha

scale factor of model width. Default: 1.

TYPE: float DEFAULT: 1.0

round_nearest

divisor of make divisible function. Default: 8.

TYPE: int DEFAULT: 8

in_channels

number the channels of the input. Default: 3.

TYPE: int DEFAULT: 3

num_classes

number of classification classes. Default: 1000.

TYPE: int DEFAULT: 1000

Source code in mindcv/models/mobilenetv2.py
class MobileNetV2(nn.Cell):\nr\"\"\"MobileNetV2 model class, based on\n    `\"MobileNetV2: Inverted Residuals and Linear Bottlenecks\" <https://arxiv.org/abs/1801.04381>`_\n\n    Args:\n        alpha: scale factor of model width. Default: 1.\n        round_nearest: divisor of make divisible function. Default: 8.\n        in_channels: number the channels of the input. Default: 3.\n        num_classes: number of classification classes. Default: 1000.\n    \"\"\"\n\n    def __init__(\n        self,\n        alpha: float = 1.0,\n        round_nearest: int = 8,\n        in_channels: int = 3,\n        num_classes: int = 1000,\n    ) -> None:\n        super().__init__()\n        input_channels = make_divisible(32 * alpha, round_nearest)\n        # Setting of inverted residual blocks.\n        # t: The expansion factor.\n        # c: Number of output channel.\n        # n: Number of block.\n        # s: First block stride.\n        inverted_residual_setting = [\n            # t, c, n, s\n            [1, 16, 1, 1],\n            [6, 24, 2, 2],\n            [6, 32, 3, 2],\n            [6, 64, 4, 2],\n            [6, 96, 3, 1],\n            [6, 160, 3, 2],\n            [6, 320, 1, 1],\n        ]\n        last_channels = make_divisible(1280 * max(1.0, alpha), round_nearest)\n\n        # Building stem conv layer.\n        features = [\n            nn.Conv2d(in_channels, input_channels, 3, 2, pad_mode=\"pad\", padding=1, has_bias=False),\n            nn.BatchNorm2d(input_channels),\n            nn.ReLU6(),\n        ]\n        # Building inverted residual blocks.\n        for t, c, n, s in inverted_residual_setting:\n            output_channel = make_divisible(c * alpha, round_nearest)\n            for i in range(n):\n                stride = s if i == 0 else 1\n                features.append(InvertedResidual(input_channels, output_channel, stride, expand_ratio=t))\n                input_channels = output_channel\n        # Building last point-wise layers.\n        features.extend([\n            nn.Conv2d(input_channels, last_channels, 1, 1, pad_mode=\"pad\", padding=0, has_bias=False),\n            nn.BatchNorm2d(last_channels),\n            nn.ReLU6(),\n        ])\n        self.features = nn.SequentialCell(features)\n\n        self.pool = GlobalAvgPooling()\n        self.classifier = nn.SequentialCell([\n            Dropout(p=0.2),  # confirmed by paper authors\n            nn.Dense(last_channels, num_classes),\n        ])\n        self._initialize_weights()\n\n    def _initialize_weights(self) -> None:\n\"\"\"Initialize weights for cells.\"\"\"\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                n = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels\n                cell.weight.set_data(\n                    init.initializer(init.Normal(sigma=math.sqrt(2. / n), mean=0.0),\n                                     cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, nn.BatchNorm2d):\n                cell.gamma.set_data(init.initializer(\"ones\", cell.gamma.shape, cell.gamma.dtype))\n                cell.beta.set_data(init.initializer(\"zeros\", cell.beta.shape, cell.beta.dtype))\n            elif isinstance(cell, nn.Dense):\n                cell.weight.set_data(\n                    init.initializer(init.Normal(sigma=0.01, mean=0.0), cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.features(x)\n        return x\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.pool(x)\n        x = self.classifier(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.mobilenetv2.mobilenet_v2_035_128","title":"mindcv.models.mobilenetv2.mobilenet_v2_035_128(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV2 model with width scaled by 0.35 and input image size of 128. Refer to the base class models.MobileNetV2 for more details.

Source code in mindcv/models/mobilenetv2.py
@register_model\ndef mobilenet_v2_035_128(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:\n\"\"\"Get MobileNetV2 model with width scaled by 0.35 and input image size of 128.\n    Refer to the base class `models.MobileNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v2_035_128\"]\n    model = MobileNetV2(alpha=0.35, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv2.mobilenet_v2_035_160","title":"mindcv.models.mobilenetv2.mobilenet_v2_035_160(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV2 model with width scaled by 0.35 and input image size of 160. Refer to the base class models.MobileNetV2 for more details.

Source code in mindcv/models/mobilenetv2.py
@register_model\ndef mobilenet_v2_035_160(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:\n\"\"\"Get MobileNetV2 model with width scaled by 0.35 and input image size of 160.\n    Refer to the base class `models.MobileNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v2_035_160\"]\n    model = MobileNetV2(alpha=0.35, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv2.mobilenet_v2_035_192","title":"mindcv.models.mobilenetv2.mobilenet_v2_035_192(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV2 model with width scaled by 0.35 and input image size of 192. Refer to the base class models.MobileNetV2 for more details.

Source code in mindcv/models/mobilenetv2.py
@register_model\ndef mobilenet_v2_035_192(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:\n\"\"\"Get MobileNetV2 model with width scaled by 0.35 and input image size of 192.\n    Refer to the base class `models.MobileNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v2_035_192\"]\n    model = MobileNetV2(alpha=0.35, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv2.mobilenet_v2_035_224","title":"mindcv.models.mobilenetv2.mobilenet_v2_035_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV2 model with width scaled by 0.35 and input image size of 224. Refer to the base class models.MobileNetV2 for more details.

Source code in mindcv/models/mobilenetv2.py
@register_model\ndef mobilenet_v2_035_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:\n\"\"\"Get MobileNetV2 model with width scaled by 0.35 and input image size of 224.\n    Refer to the base class `models.MobileNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v2_035_224\"]\n    model = MobileNetV2(alpha=0.35, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv2.mobilenet_v2_035_96","title":"mindcv.models.mobilenetv2.mobilenet_v2_035_96(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV2 model with width scaled by 0.35 and input image size of 96. Refer to the base class models.MobileNetV2 for more details.

Source code in mindcv/models/mobilenetv2.py
@register_model\ndef mobilenet_v2_035_96(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:\n\"\"\"Get MobileNetV2 model with width scaled by 0.35 and input image size of 96.\n    Refer to the base class `models.MobileNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v2_035_96\"]\n    model = MobileNetV2(alpha=0.35, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv2.mobilenet_v2_050_128","title":"mindcv.models.mobilenetv2.mobilenet_v2_050_128(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV2 model with width scaled by 0.5 and input image size of 128. Refer to the base class models.MobileNetV2 for more details.

Source code in mindcv/models/mobilenetv2.py
@register_model\ndef mobilenet_v2_050_128(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:\n\"\"\"Get MobileNetV2 model with width scaled by 0.5 and input image size of 128.\n    Refer to the base class `models.MobileNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v2_050_128\"]\n    model = MobileNetV2(alpha=0.5, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv2.mobilenet_v2_050_160","title":"mindcv.models.mobilenetv2.mobilenet_v2_050_160(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV2 model with width scaled by 0.5 and input image size of 160. Refer to the base class models.MobileNetV2 for more details.

Source code in mindcv/models/mobilenetv2.py
@register_model\ndef mobilenet_v2_050_160(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:\n\"\"\"Get MobileNetV2 model with width scaled by 0.5 and input image size of 160.\n    Refer to the base class `models.MobileNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v2_050_160\"]\n    model = MobileNetV2(alpha=0.5, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv2.mobilenet_v2_050_192","title":"mindcv.models.mobilenetv2.mobilenet_v2_050_192(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV2 model with width scaled by 0.5 and input image size of 192. Refer to the base class models.MobileNetV2 for more details.

Source code in mindcv/models/mobilenetv2.py
@register_model\ndef mobilenet_v2_050_192(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:\n\"\"\"Get MobileNetV2 model with width scaled by 0.5 and input image size of 192.\n    Refer to the base class `models.MobileNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v2_050_192\"]\n    model = MobileNetV2(alpha=0.5, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv2.mobilenet_v2_050_224","title":"mindcv.models.mobilenetv2.mobilenet_v2_050_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV2 model with width scaled by 0.5 and input image size of 224. Refer to the base class models.MobileNetV2 for more details.

Source code in mindcv/models/mobilenetv2.py
@register_model\ndef mobilenet_v2_050_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:\n\"\"\"Get MobileNetV2 model with width scaled by 0.5 and input image size of 224.\n    Refer to the base class `models.MobileNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v2_050_224\"]\n    model = MobileNetV2(alpha=0.5, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv2.mobilenet_v2_050_96","title":"mindcv.models.mobilenetv2.mobilenet_v2_050_96(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV2 model with width scaled by 0.5 and input image size of 96. Refer to the base class models.MobileNetV2 for more details.

Source code in mindcv/models/mobilenetv2.py
@register_model\ndef mobilenet_v2_050_96(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:\n\"\"\"Get MobileNetV2 model with width scaled by 0.5 and input image size of 96.\n    Refer to the base class `models.MobileNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v2_050_96\"]\n    model = MobileNetV2(alpha=0.5, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv2.mobilenet_v2_075","title":"mindcv.models.mobilenetv2.mobilenet_v2_075(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV2 model with width scaled by 0.75 and input image size of 224. Refer to the base class models.MobileNetV2 for more details.

Source code in mindcv/models/mobilenetv2.py
@register_model\ndef mobilenet_v2_075(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:\n\"\"\"Get MobileNetV2 model with width scaled by 0.75 and input image size of 224.\n    Refer to the base class `models.MobileNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v2_075\"]\n    model = MobileNetV2(alpha=0.75, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv2.mobilenet_v2_075_128","title":"mindcv.models.mobilenetv2.mobilenet_v2_075_128(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV2 model with width scaled by 0.75 and input image size of 128. Refer to the base class models.MobileNetV2 for more details.

Source code in mindcv/models/mobilenetv2.py
@register_model\ndef mobilenet_v2_075_128(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:\n\"\"\"Get MobileNetV2 model with width scaled by 0.75 and input image size of 128.\n    Refer to the base class `models.MobileNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v2_075_128\"]\n    model = MobileNetV2(alpha=0.75, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv2.mobilenet_v2_075_160","title":"mindcv.models.mobilenetv2.mobilenet_v2_075_160(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV2 model with width scaled by 0.75 and input image size of 160. Refer to the base class models.MobileNetV2 for more details.

Source code in mindcv/models/mobilenetv2.py
@register_model\ndef mobilenet_v2_075_160(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:\n\"\"\"Get MobileNetV2 model with width scaled by 0.75 and input image size of 160.\n    Refer to the base class `models.MobileNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v2_075_160\"]\n    model = MobileNetV2(alpha=0.75, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv2.mobilenet_v2_075_192","title":"mindcv.models.mobilenetv2.mobilenet_v2_075_192(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV2 model with width scaled by 0.75 and input image size of 192. Refer to the base class models.MobileNetV2 for more details.

Source code in mindcv/models/mobilenetv2.py
@register_model\ndef mobilenet_v2_075_192(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:\n\"\"\"Get MobileNetV2 model with width scaled by 0.75 and input image size of 192.\n    Refer to the base class `models.MobileNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v2_075_192\"]\n    model = MobileNetV2(alpha=0.75, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv2.mobilenet_v2_075_96","title":"mindcv.models.mobilenetv2.mobilenet_v2_075_96(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV2 model with width scaled by 0.75 and input image size of 96. Refer to the base class models.MobileNetV2 for more details.

Source code in mindcv/models/mobilenetv2.py
@register_model\ndef mobilenet_v2_075_96(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:\n\"\"\"Get MobileNetV2 model with width scaled by 0.75 and input image size of 96.\n    Refer to the base class `models.MobileNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v2_075_96\"]\n    model = MobileNetV2(alpha=0.75, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv2.mobilenet_v2_100","title":"mindcv.models.mobilenetv2.mobilenet_v2_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV2 model without width scaling and input image size of 224. Refer to the base class models.MobileNetV2 for more details.

Source code in mindcv/models/mobilenetv2.py
@register_model\ndef mobilenet_v2_100(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:\n\"\"\"Get MobileNetV2 model without width scaling and input image size of 224.\n    Refer to the base class `models.MobileNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v2_100\"]\n    model = MobileNetV2(alpha=1.0, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv2.mobilenet_v2_100_128","title":"mindcv.models.mobilenetv2.mobilenet_v2_100_128(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV2 model without width scaling and input image size of 128. Refer to the base class models.MobileNetV2 for more details.

Source code in mindcv/models/mobilenetv2.py
@register_model\ndef mobilenet_v2_100_128(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:\n\"\"\"Get MobileNetV2 model without width scaling and input image size of 128.\n    Refer to the base class `models.MobileNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v2_100_128\"]\n    model = MobileNetV2(alpha=1.0, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv2.mobilenet_v2_100_160","title":"mindcv.models.mobilenetv2.mobilenet_v2_100_160(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV2 model without width scaling and input image size of 160. Refer to the base class models.MobileNetV2 for more details.

Source code in mindcv/models/mobilenetv2.py
@register_model\ndef mobilenet_v2_100_160(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:\n\"\"\"Get MobileNetV2 model without width scaling and input image size of 160.\n    Refer to the base class `models.MobileNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v2_100_160\"]\n    model = MobileNetV2(alpha=1.0, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv2.mobilenet_v2_100_192","title":"mindcv.models.mobilenetv2.mobilenet_v2_100_192(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV2 model without width scaling and input image size of 192. Refer to the base class models.MobileNetV2 for more details.

Source code in mindcv/models/mobilenetv2.py
@register_model\ndef mobilenet_v2_100_192(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:\n\"\"\"Get MobileNetV2 model without width scaling and input image size of 192.\n    Refer to the base class `models.MobileNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v2_100_192\"]\n    model = MobileNetV2(alpha=1.0, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv2.mobilenet_v2_100_96","title":"mindcv.models.mobilenetv2.mobilenet_v2_100_96(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV2 model without width scaling and input image size of 96. Refer to the base class models.MobileNetV2 for more details.

Source code in mindcv/models/mobilenetv2.py
@register_model\ndef mobilenet_v2_100_96(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:\n\"\"\"Get MobileNetV2 model without width scaling and input image size of 96.\n    Refer to the base class `models.MobileNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v2_100_96\"]\n    model = MobileNetV2(alpha=1.0, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv2.mobilenet_v2_130_224","title":"mindcv.models.mobilenetv2.mobilenet_v2_130_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV2 model with width scaled by 1.3 and input image size of 224. Refer to the base class models.MobileNetV2 for more details.

Source code in mindcv/models/mobilenetv2.py
@register_model\ndef mobilenet_v2_130_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:\n\"\"\"Get MobileNetV2 model with width scaled by 1.3 and input image size of 224.\n    Refer to the base class `models.MobileNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v2_130_224\"]\n    model = MobileNetV2(alpha=1.3, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilenetv2.mobilenet_v2_140","title":"mindcv.models.mobilenetv2.mobilenet_v2_140(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get MobileNetV2 model with width scaled by 1.4 and input image size of 224. Refer to the base class models.MobileNetV2 for more details.

Source code in mindcv/models/mobilenetv2.py
@register_model\ndef mobilenet_v2_140(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:\n\"\"\"Get MobileNetV2 model with width scaled by 1.4 and input image size of 224.\n    Refer to the base class `models.MobileNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v2_140\"]\n    model = MobileNetV2(alpha=1.4, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mobilenetv3","title":"mobilenetv3","text":""},{"location":"reference/models/#mindcv.models.mobilenetv3.MobileNetV3","title":"mindcv.models.mobilenetv3.MobileNetV3","text":"

Bases: nn.Cell

MobileNetV3 model class, based on \"Searching for MobileNetV3\" <https://arxiv.org/abs/1905.02244>_

PARAMETER DESCRIPTION arch

size of the architecture. 'small' or 'large'.

TYPE: str

alpha

scale factor of model width. Default: 1.

TYPE: float DEFAULT: 1.0

round_nearest

divisor of make divisible function. Default: 8.

TYPE: int DEFAULT: 8

in_channels

number the channels of the input. Default: 3.

TYPE: int DEFAULT: 3

num_classes

number of classification classes. Default: 1000.

TYPE: int DEFAULT: 1000

Source code in mindcv/models/mobilenetv3.py
class MobileNetV3(nn.Cell):\nr\"\"\"MobileNetV3 model class, based on\n    `\"Searching for MobileNetV3\" <https://arxiv.org/abs/1905.02244>`_\n\n    Args:\n        arch: size of the architecture. 'small' or 'large'.\n        alpha: scale factor of model width. Default: 1.\n        round_nearest: divisor of make divisible function. Default: 8.\n        in_channels: number the channels of the input. Default: 3.\n        num_classes: number of classification classes. Default: 1000.\n    \"\"\"\n\n    def __init__(\n        self,\n        arch: str,\n        alpha: float = 1.0,\n        round_nearest: int = 8,\n        in_channels: int = 3,\n        num_classes: int = 1000,\n    ) -> None:\n        super().__init__()\n        input_channels = make_divisible(16 * alpha, round_nearest)\n        # Setting of bottleneck blocks. ex: [k, e, c, se, nl, s]\n        # k: kernel size of depth-wise conv\n        # e: expansion size\n        # c: number of output channel\n        # se: whether there is a Squeeze-And-Excite in that block\n        # nl: type of non-linearity used\n        # s: stride of depth-wise conv\n        if arch == \"large\":\n            bottleneck_setting = [\n                [3, 16, 16, False, \"relu\", 1],\n                [3, 64, 24, False, \"relu\", 2],\n                [3, 72, 24, False, \"relu\", 1],\n                [5, 72, 40, True, \"relu\", 2],\n                [5, 120, 40, True, \"relu\", 1],\n                [5, 120, 40, True, \"relu\", 1],\n                [3, 240, 80, False, \"hswish\", 2],\n                [3, 200, 80, False, \"hswish\", 1],\n                [3, 184, 80, False, \"hswish\", 1],\n                [3, 184, 80, False, \"hswish\", 1],\n                [3, 480, 112, True, \"hswish\", 1],\n                [3, 672, 112, True, \"hswish\", 1],\n                [5, 672, 160, True, \"hswish\", 2],\n                [5, 960, 160, True, \"hswish\", 1],\n                [5, 960, 160, True, \"hswish\", 1],\n            ]\n            last_channels = make_divisible(alpha * 1280, round_nearest)\n        elif arch == \"small\":\n            bottleneck_setting = [\n                [3, 16, 16, True, \"relu\", 2],\n                [3, 72, 24, False, \"relu\", 2],\n                [3, 88, 24, False, \"relu\", 1],\n                [5, 96, 40, True, \"hswish\", 2],\n                [5, 240, 40, True, \"hswish\", 1],\n                [5, 240, 40, True, \"hswish\", 1],\n                [5, 120, 48, True, \"hswish\", 1],\n                [5, 144, 48, True, \"hswish\", 1],\n                [5, 288, 96, True, \"hswish\", 2],\n                [5, 576, 96, True, \"hswish\", 1],\n                [5, 576, 96, True, \"hswish\", 1],\n            ]\n            last_channels = make_divisible(alpha * 1024, round_nearest)\n        else:\n            raise ValueError(f\"Unsupported model type {arch}\")\n\n        # Building stem conv layer.\n        features = [\n            nn.Conv2d(in_channels, input_channels, 3, 2, pad_mode=\"pad\", padding=1, has_bias=False),\n            nn.BatchNorm2d(input_channels),\n            nn.HSwish(),\n        ]\n\n        total_reduction = 2\n        self.feature_info = [dict(chs=input_channels, reduction=total_reduction, name=f'features.{len(features) - 1}')]\n\n        # Building bottleneck blocks.\n        for k, e, c, se, nl, s in bottleneck_setting:\n            exp_channels = make_divisible(alpha * e, round_nearest)\n            output_channels = make_divisible(alpha * c, round_nearest)\n            features.append(Bottleneck(input_channels, exp_channels, output_channels,\n                                       kernel_size=k, stride=s, activation=nl, use_se=se))\n            input_channels = output_channels\n\n            total_reduction *= s\n            self.feature_info.append(dict(chs=input_channels, reduction=total_reduction,\n                                          name=f'features.{len(features) - 1}'))\n\n        # Building last point-wise conv layers.\n        output_channels = input_channels * 6\n        features.extend([\n            nn.Conv2d(input_channels, output_channels, 1, 1, pad_mode=\"pad\", padding=0, has_bias=False),\n            nn.BatchNorm2d(output_channels),\n            nn.HSwish(),\n        ])\n\n        self.feature_info.append(dict(chs=output_channels, reduction=total_reduction,\n                                      name=f'features.{len(features) - 1}'))\n        self.flatten_sequential = True\n\n        self.features = nn.SequentialCell(features)\n\n        self.pool = GlobalAvgPooling()\n        self.classifier = nn.SequentialCell([\n            nn.Dense(output_channels, last_channels),\n            nn.HSwish(),\n            Dropout(p=0.2),\n            nn.Dense(last_channels, num_classes),\n        ])\n        self._initialize_weights()\n\n    def _initialize_weights(self) -> None:\n\"\"\"Initialize weights for cells.\"\"\"\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                n = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels\n                cell.weight.set_data(\n                    init.initializer(init.Normal(sigma=math.sqrt(2. / n), mean=0.0),\n                                     cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, nn.BatchNorm2d):\n                cell.gamma.set_data(init.initializer(\"ones\", cell.gamma.shape, cell.gamma.dtype))\n                cell.beta.set_data(init.initializer(\"zeros\", cell.beta.shape, cell.beta.dtype))\n            elif isinstance(cell, nn.Dense):\n                cell.weight.set_data(\n                    init.initializer(init.Normal(sigma=0.01, mean=0.0), cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.features(x)\n        return x\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.pool(x)\n        x = self.classifier(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.mobilenetv3.mobilenet_v3_large_075","title":"mindcv.models.mobilenetv3.mobilenet_v3_large_075(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get large MobileNetV3 model with width scaled by 0.75. Refer to the base class models.MobileNetV3 for more details.

Source code in mindcv/models/mobilenetv3.py
@register_model\ndef mobilenet_v3_large_075(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV3:\n\"\"\"Get large MobileNetV3 model with width scaled by 0.75.\n    Refer to the base class `models.MobileNetV3` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v3_large_075\"]\n    model_args = dict(arch=\"large\", alpha=0.75, in_channels=in_channels, num_classes=num_classes, **kwargs)\n    return _create_mobilenet_v3(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.mobilenetv3.mobilenet_v3_large_100","title":"mindcv.models.mobilenetv3.mobilenet_v3_large_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get large MobileNetV3 model without width scaling. Refer to the base class models.MobileNetV3 for more details.

Source code in mindcv/models/mobilenetv3.py
@register_model\ndef mobilenet_v3_large_100(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV3:\n\"\"\"Get large MobileNetV3 model without width scaling.\n    Refer to the base class `models.MobileNetV3` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v3_large_100\"]\n    model_args = dict(arch=\"large\", alpha=1.0, in_channels=in_channels, num_classes=num_classes, **kwargs)\n    return _create_mobilenet_v3(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.mobilenetv3.mobilenet_v3_small_075","title":"mindcv.models.mobilenetv3.mobilenet_v3_small_075(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get small MobileNetV3 model with width scaled by 0.75. Refer to the base class models.MobileNetV3 for more details.

Source code in mindcv/models/mobilenetv3.py
@register_model\ndef mobilenet_v3_small_075(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV3:\n\"\"\"Get small MobileNetV3 model with width scaled by 0.75.\n    Refer to the base class `models.MobileNetV3` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v3_small_075\"]\n    model_args = dict(arch=\"small\", alpha=0.75, in_channels=in_channels, num_classes=num_classes, **kwargs)\n    return _create_mobilenet_v3(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.mobilenetv3.mobilenet_v3_small_100","title":"mindcv.models.mobilenetv3.mobilenet_v3_small_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get small MobileNetV3 model without width scaling. Refer to the base class models.MobileNetV3 for more details.

Source code in mindcv/models/mobilenetv3.py
@register_model\ndef mobilenet_v3_small_100(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV3:\n\"\"\"Get small MobileNetV3 model without width scaling.\n    Refer to the base class `models.MobileNetV3` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"mobilenet_v3_small_100\"]\n    model_args = dict(arch=\"small\", alpha=1.0, in_channels=in_channels, num_classes=num_classes, **kwargs)\n    return _create_mobilenet_v3(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mobilevit","title":"mobilevit","text":""},{"location":"reference/models/#mindcv.models.mobilevit.mobilevit_small","title":"mindcv.models.mobilevit.mobilevit_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/mobilevit.py
@register_model\ndef mobilevit_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileViT:\n    config = get_config(\"small\")\n    model = MobileViT(config, num_classes=num_classes, **kwargs)\n    default_cfg = default_cfgs[\"mobilevit_small\"]\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilevit.mobilevit_x_small","title":"mindcv.models.mobilevit.mobilevit_x_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/mobilevit.py
@register_model\ndef mobilevit_x_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileViT:\n    config = get_config(\"x_small\")\n    model = MobileViT(config, num_classes=num_classes, **kwargs)\n    default_cfg = default_cfgs[\"mobilevit_x_small\"]\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.mobilevit.mobilevit_xx_small","title":"mindcv.models.mobilevit.mobilevit_xx_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/mobilevit.py
@register_model\ndef mobilevit_xx_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileViT:\n    config = get_config(\"xx_small\")\n    model = MobileViT(config, num_classes=num_classes, **kwargs)\n    default_cfg = default_cfgs[\"mobilevit_xx_small\"]\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#nasnet","title":"nasnet","text":""},{"location":"reference/models/#mindcv.models.nasnet.NASNetAMobile","title":"mindcv.models.nasnet.NASNetAMobile","text":"

Bases: nn.Cell

NasNet model class, based on \"Learning Transferable Architectures for Scalable Image Recognition\" <https://arxiv.org/pdf/1707.07012v4.pdf>_

PARAMETER DESCRIPTION num_classes

number of classification classes.

TYPE: int DEFAULT: 1000

stem_filters

number of stem filters. Default: 32.

TYPE: int DEFAULT: 32

penultimate_filters

number of penultimate filters. Default: 1056.

TYPE: int DEFAULT: 1056

filters_multiplier

size of filters multiplier. Default: 2.

TYPE: int DEFAULT: 2

Source code in mindcv/models/nasnet.py
class NASNetAMobile(nn.Cell):\nr\"\"\"NasNet model class, based on\n    `\"Learning Transferable Architectures for Scalable Image Recognition\" <https://arxiv.org/pdf/1707.07012v4.pdf>`_\n    Args:\n        num_classes: number of classification classes.\n        stem_filters: number of stem filters. Default: 32.\n        penultimate_filters: number of penultimate filters. Default: 1056.\n        filters_multiplier: size of filters multiplier. Default: 2.\n    \"\"\"\n\n    def __init__(\n        self,\n        in_channels: int = 3,\n        num_classes: int = 1000,\n        stem_filters: int = 32,\n        penultimate_filters: int = 1056,\n        filters_multiplier: int = 2,\n    ) -> None:\n        super().__init__()\n        self.stem_filters = stem_filters\n        self.penultimate_filters = penultimate_filters\n        self.filters_multiplier = filters_multiplier\n\n        filters = self.penultimate_filters // 24\n        # 24 is default value for the architecture\n\n        self.conv0 = nn.SequentialCell([\n            nn.Conv2d(in_channels=in_channels, out_channels=self.stem_filters, kernel_size=3, stride=2, pad_mode=\"pad\",\n                      padding=0,\n                      has_bias=False),\n            nn.BatchNorm2d(num_features=self.stem_filters, eps=0.001, momentum=0.9, affine=True)\n        ])\n\n        self.cell_stem_0 = CellStem0(\n            self.stem_filters, num_filters=filters // (filters_multiplier ** 2)\n        )\n        self.cell_stem_1 = CellStem1(\n            self.stem_filters, num_filters=filters // filters_multiplier\n        )\n\n        self.cell_0 = FirstCell(\n            in_channels_left=filters,\n            out_channels_left=filters // 2,  # 1, 0.5\n            in_channels_right=2 * filters,\n            out_channels_right=filters,\n        )  # 2, 1\n        self.cell_1 = NormalCell(\n            in_channels_left=2 * filters,\n            out_channels_left=filters,  # 2, 1\n            in_channels_right=6 * filters,\n            out_channels_right=filters,\n        )  # 6, 1\n        self.cell_2 = NormalCell(\n            in_channels_left=6 * filters,\n            out_channels_left=filters,  # 6, 1\n            in_channels_right=6 * filters,\n            out_channels_right=filters,\n        )  # 6, 1\n        self.cell_3 = NormalCell(\n            in_channels_left=6 * filters,\n            out_channels_left=filters,  # 6, 1\n            in_channels_right=6 * filters,\n            out_channels_right=filters,\n        )  # 6, 1\n\n        self.reduction_cell_0 = ReductionCell0(\n            in_channels_left=6 * filters,\n            out_channels_left=2 * filters,  # 6, 2\n            in_channels_right=6 * filters,\n            out_channels_right=2 * filters,\n        )  # 6, 2\n\n        self.cell_6 = FirstCell(\n            in_channels_left=6 * filters,\n            out_channels_left=filters,  # 6, 1\n            in_channels_right=8 * filters,\n            out_channels_right=2 * filters,\n        )  # 8, 2\n        self.cell_7 = NormalCell(\n            in_channels_left=8 * filters,\n            out_channels_left=2 * filters,  # 8, 2\n            in_channels_right=12 * filters,\n            out_channels_right=2 * filters,\n        )  # 12, 2\n        self.cell_8 = NormalCell(\n            in_channels_left=12 * filters,\n            out_channels_left=2 * filters,  # 12, 2\n            in_channels_right=12 * filters,\n            out_channels_right=2 * filters,\n        )  # 12, 2\n        self.cell_9 = NormalCell(\n            in_channels_left=12 * filters,\n            out_channels_left=2 * filters,  # 12, 2\n            in_channels_right=12 * filters,\n            out_channels_right=2 * filters,\n        )  # 12, 2\n\n        self.reduction_cell_1 = ReductionCell1(\n            in_channels_left=12 * filters,\n            out_channels_left=4 * filters,  # 12, 4\n            in_channels_right=12 * filters,\n            out_channels_right=4 * filters,\n        )  # 12, 4\n\n        self.cell_12 = FirstCell(\n            in_channels_left=12 * filters,\n            out_channels_left=2 * filters,  # 12, 2\n            in_channels_right=16 * filters,\n            out_channels_right=4 * filters,\n        )  # 16, 4\n        self.cell_13 = NormalCell(\n            in_channels_left=16 * filters,\n            out_channels_left=4 * filters,  # 16, 4\n            in_channels_right=24 * filters,\n            out_channels_right=4 * filters,\n        )  # 24, 4\n        self.cell_14 = NormalCell(\n            in_channels_left=24 * filters,\n            out_channels_left=4 * filters,  # 24, 4\n            in_channels_right=24 * filters,\n            out_channels_right=4 * filters,\n        )  # 24, 4\n        self.cell_15 = NormalCell(\n            in_channels_left=24 * filters,\n            out_channels_left=4 * filters,  # 24, 4\n            in_channels_right=24 * filters,\n            out_channels_right=4 * filters,\n        )  # 24, 4\n\n        self.relu = nn.ReLU()\n        self.dropout = Dropout(p=0.5)\n        self.classifier = nn.Dense(in_channels=24 * filters, out_channels=num_classes)\n        self.pool = GlobalAvgPooling()\n        self._initialize_weights()\n\n    def _initialize_weights(self):\n\"\"\"Initialize weights for cells.\"\"\"\n        self.init_parameters_data()\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                n = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels\n                cell.weight.set_data(init.initializer(init.Normal(math.sqrt(2. / n), 0),\n                                                      cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(init.Zero(), cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, nn.BatchNorm2d):\n                cell.gamma.set_data(init.initializer(init.One(), cell.gamma.shape, cell.gamma.dtype))\n                cell.beta.set_data(init.initializer(init.Zero(), cell.beta.shape, cell.beta.dtype))\n            elif isinstance(cell, nn.Dense):\n                cell.weight.set_data(init.initializer(init.Normal(0.01, 0), cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(init.Zero(), cell.bias.shape, cell.bias.dtype))\n\n    def forward_features(self, x: Tensor) -> Tensor:\n\"\"\"Network forward feature extraction.\"\"\"\n        x_conv0 = self.conv0(x)\n        x_stem_0 = self.cell_stem_0(x_conv0)\n        x_stem_1 = self.cell_stem_1(x_conv0, x_stem_0)\n\n        x_cell_0 = self.cell_0(x_stem_1, x_stem_0)\n        x_cell_1 = self.cell_1(x_cell_0, x_stem_1)\n        x_cell_2 = self.cell_2(x_cell_1, x_cell_0)\n        x_cell_3 = self.cell_3(x_cell_2, x_cell_1)\n\n        x_reduction_cell_0 = self.reduction_cell_0(x_cell_3, x_cell_2)\n\n        x_cell_6 = self.cell_6(x_reduction_cell_0, x_cell_3)\n        x_cell_7 = self.cell_7(x_cell_6, x_reduction_cell_0)\n        x_cell_8 = self.cell_8(x_cell_7, x_cell_6)\n        x_cell_9 = self.cell_9(x_cell_8, x_cell_7)\n\n        x_reduction_cell_1 = self.reduction_cell_1(x_cell_9, x_cell_8)\n\n        x_cell_12 = self.cell_12(x_reduction_cell_1, x_cell_9)\n        x_cell_13 = self.cell_13(x_cell_12, x_reduction_cell_1)\n        x_cell_14 = self.cell_14(x_cell_13, x_cell_12)\n        x_cell_15 = self.cell_15(x_cell_14, x_cell_13)\n\n        x_cell_15 = self.relu(x_cell_15)\n        return x_cell_15\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.pool(x)  # global average pool\n        x = self.dropout(x)\n        x = self.classifier(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.nasnet.NASNetAMobile.forward_features","title":"mindcv.models.nasnet.NASNetAMobile.forward_features(x)","text":"

Network forward feature extraction.

Source code in mindcv/models/nasnet.py
def forward_features(self, x: Tensor) -> Tensor:\n\"\"\"Network forward feature extraction.\"\"\"\n    x_conv0 = self.conv0(x)\n    x_stem_0 = self.cell_stem_0(x_conv0)\n    x_stem_1 = self.cell_stem_1(x_conv0, x_stem_0)\n\n    x_cell_0 = self.cell_0(x_stem_1, x_stem_0)\n    x_cell_1 = self.cell_1(x_cell_0, x_stem_1)\n    x_cell_2 = self.cell_2(x_cell_1, x_cell_0)\n    x_cell_3 = self.cell_3(x_cell_2, x_cell_1)\n\n    x_reduction_cell_0 = self.reduction_cell_0(x_cell_3, x_cell_2)\n\n    x_cell_6 = self.cell_6(x_reduction_cell_0, x_cell_3)\n    x_cell_7 = self.cell_7(x_cell_6, x_reduction_cell_0)\n    x_cell_8 = self.cell_8(x_cell_7, x_cell_6)\n    x_cell_9 = self.cell_9(x_cell_8, x_cell_7)\n\n    x_reduction_cell_1 = self.reduction_cell_1(x_cell_9, x_cell_8)\n\n    x_cell_12 = self.cell_12(x_reduction_cell_1, x_cell_9)\n    x_cell_13 = self.cell_13(x_cell_12, x_reduction_cell_1)\n    x_cell_14 = self.cell_14(x_cell_13, x_cell_12)\n    x_cell_15 = self.cell_15(x_cell_14, x_cell_13)\n\n    x_cell_15 = self.relu(x_cell_15)\n    return x_cell_15\n
"},{"location":"reference/models/#mindcv.models.nasnet.nasnet_a_4x1056","title":"mindcv.models.nasnet.nasnet_a_4x1056(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get NasNet model. Refer to the base class models.NASNetAMobile for more details.

Source code in mindcv/models/nasnet.py
@register_model\ndef nasnet_a_4x1056(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> NASNetAMobile:\n\"\"\"Get NasNet model.\n    Refer to the base class `models.NASNetAMobile` for more details.\"\"\"\n    default_cfg = default_cfgs[\"nasnet_a_4x1056\"]\n    model = NASNetAMobile(in_channels=in_channels, num_classes=num_classes, **kwargs)\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#pit","title":"pit","text":""},{"location":"reference/models/#mindcv.models.pit.pit_b","title":"mindcv.models.pit.pit_b(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get PiT-B model. Refer to the base class models.PoolingTransformer for more details.

Source code in mindcv/models/pit.py
@register_model\ndef pit_b(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolingTransformer:\n\"\"\"Get PiT-B model.\n    Refer to the base class `models.PoolingTransformer` for more details.\"\"\"\n    default_cfg = default_cfgs[\"pit_b\"]\n    model = PoolingTransformer(\n        image_size=224,\n        patch_size=14,\n        stride=7,\n        base_dims=[64, 64, 64],\n        depth=[3, 6, 4],\n        heads=[4, 8, 16],\n        mlp_ratio=4.0,\n        num_classes=num_classes,\n        in_chans=in_channels,\n        **kwargs\n    )\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.pit.pit_s","title":"mindcv.models.pit.pit_s(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get PiT-S model. Refer to the base class models.PoolingTransformer for more details.

Source code in mindcv/models/pit.py
@register_model\ndef pit_s(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolingTransformer:\n\"\"\"Get PiT-S model.\n    Refer to the base class `models.PoolingTransformer` for more details.\"\"\"\n    default_cfg = default_cfgs[\"pit_s\"]\n    model = PoolingTransformer(\n        image_size=224,\n        patch_size=16,\n        stride=8,\n        base_dims=[48, 48, 48],\n        depth=[2, 6, 4],\n        heads=[3, 6, 12],\n        mlp_ratio=4.0,\n        num_classes=num_classes,\n        in_chans=in_channels,\n        **kwargs\n    )\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.pit.pit_ti","title":"mindcv.models.pit.pit_ti(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get PiT-Ti model. Refer to the base class models.PoolingTransformer for more details.

Source code in mindcv/models/pit.py
@register_model\ndef pit_ti(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolingTransformer:\n\"\"\"Get PiT-Ti model.\n    Refer to the base class `models.PoolingTransformer` for more details.\"\"\"\n    default_cfg = default_cfgs[\"pit_ti\"]\n    model = PoolingTransformer(\n        image_size=224,\n        patch_size=16,\n        stride=8,\n        base_dims=[32, 32, 32],\n        depth=[2, 6, 4],\n        heads=[2, 4, 8],\n        mlp_ratio=4.0,\n        num_classes=num_classes,\n        in_chans=in_channels,\n        **kwargs\n    )\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.pit.pit_xs","title":"mindcv.models.pit.pit_xs(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get PiT-XS model. Refer to the base class models.PoolingTransformer for more details.

Source code in mindcv/models/pit.py
@register_model\ndef pit_xs(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolingTransformer:\n\"\"\"Get PiT-XS model.\n    Refer to the base class `models.PoolingTransformer` for more details.\"\"\"\n    default_cfg = default_cfgs[\"pit_xs\"]\n    model = PoolingTransformer(\n        image_size=224,\n        patch_size=16,\n        stride=8,\n        base_dims=[48, 48, 48],\n        depth=[2, 6, 4],\n        heads=[2, 4, 8],\n        mlp_ratio=4.0,\n        num_classes=num_classes,\n        in_chans=in_channels,\n        **kwargs\n    )\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#poolformer","title":"poolformer","text":""},{"location":"reference/models/#mindcv.models.poolformer.PoolFormer","title":"mindcv.models.poolformer.PoolFormer","text":"

Bases: nn.Cell

PoolFormer model class, based on \"MetaFormer Is Actually What You Need for Vision\" <https://arxiv.org/pdf/2111.11418v3.pdf>_

PARAMETER DESCRIPTION layers

number of blocks for the 4 stages

embed_dims

the embedding dims for the 4 stages. Default: (64, 128, 320, 512)

DEFAULT: (64, 128, 320, 512)

mlp_ratios

mlp ratios for the 4 stages. Default: (4, 4, 4, 4)

DEFAULT: (4, 4, 4, 4)

downsamples

flags to apply downsampling or not. Default: (True, True, True, True)

DEFAULT: (True, True, True, True)

pool_size

the pooling size for the 4 stages. Default: 3

DEFAULT: 3

in_chans

number of input channels. Default: 3

DEFAULT: 3

num_classes

number of classes for the image classification. Default: 1000

DEFAULT: 1000

global_pool

define the types of pooling layer. Default: avg

DEFAULT: 'avg'

norm_layer

define the types of normalization. Default: nn.GroupNorm

DEFAULT: nn.GroupNorm

act_layer

define the types of activation. Default: nn.GELU

DEFAULT: nn.GELU

in_patch_size

specify the patch embedding for the input image. Default: 7

DEFAULT: 7

in_stride

specify the stride for the input image. Default: 4.

DEFAULT: 4

in_pad

specify the pad for the input image. Default: 2.

DEFAULT: 2

down_patch_size

specify the downsample. Default: 3.

DEFAULT: 3

down_stride

specify the downsample (patch embed.). Default: 2.

DEFAULT: 2

down_pad

specify the downsample (patch embed.). Default: 1.

DEFAULT: 1

drop_rate

dropout rate of the layer before main classifier. Default: 0.

DEFAULT: 0.0

drop_path_rate

Stochastic Depth. Default: 0.

DEFAULT: 0.0

layer_scale_init_value

LayerScale. Default: 1e-5.

DEFAULT: 1e-05

fork_feat

whether output features of the 4 stages, for dense prediction. Default: False.

DEFAULT: False

Source code in mindcv/models/poolformer.py
class PoolFormer(nn.Cell):\nr\"\"\"PoolFormer model class, based on\n    `\"MetaFormer Is Actually What You Need for Vision\" <https://arxiv.org/pdf/2111.11418v3.pdf>`_\n\n    Args:\n        layers: number of blocks for the 4 stages\n        embed_dims: the embedding dims for the 4 stages. Default: (64, 128, 320, 512)\n        mlp_ratios: mlp ratios for the 4 stages. Default: (4, 4, 4, 4)\n        downsamples: flags to apply downsampling or not. Default: (True, True, True, True)\n        pool_size: the pooling size for the 4 stages. Default: 3\n        in_chans: number of input channels. Default: 3\n        num_classes: number of classes for the image classification. Default: 1000\n        global_pool: define the types of pooling layer. Default: avg\n        norm_layer: define the types of normalization. Default: nn.GroupNorm\n        act_layer: define the types of activation. Default: nn.GELU\n        in_patch_size: specify the patch embedding for the input image. Default: 7\n        in_stride: specify the stride for the input image. Default: 4.\n        in_pad: specify the pad for the input image. Default: 2.\n        down_patch_size: specify the downsample. Default: 3.\n        down_stride: specify the downsample (patch embed.). Default: 2.\n        down_pad: specify the downsample (patch embed.). Default: 1.\n        drop_rate: dropout rate of the layer before main classifier. Default: 0.\n        drop_path_rate: Stochastic Depth. Default: 0.\n        layer_scale_init_value: LayerScale. Default: 1e-5.\n        fork_feat: whether output features of the 4 stages, for dense prediction. Default: False.\n    \"\"\"\n\n    def __init__(\n        self,\n        layers,\n        embed_dims=(64, 128, 320, 512),\n        mlp_ratios=(4, 4, 4, 4),\n        downsamples=(True, True, True, True),\n        pool_size=3,\n        in_chans=3,\n        num_classes=1000,\n        global_pool=\"avg\",\n        norm_layer=nn.GroupNorm,\n        act_layer=nn.GELU,\n        in_patch_size=7,\n        in_stride=4,\n        in_pad=2,\n        down_patch_size=3,\n        down_stride=2,\n        down_pad=1,\n        drop_rate=0.0,\n        drop_path_rate=0.0,\n        layer_scale_init_value=1e-5,\n        fork_feat=False,\n    ):\n        super().__init__()\n\n        if not fork_feat:\n            self.num_classes = num_classes\n        self.fork_feat = fork_feat\n\n        self.global_pool = global_pool\n        self.num_features = embed_dims[-1]\n        self.grad_checkpointing = False\n\n        self.patch_embed = PatchEmbed(\n            patch_size=in_patch_size, stride=in_stride, padding=in_pad,\n            in_chs=in_chans, embed_dim=embed_dims[0])\n\n        # set the main block in network\n        network = []\n        for i in range(len(layers)):\n            network.append(basic_blocks(\n                embed_dims[i], i, layers,\n                pool_size=pool_size, mlp_ratio=mlp_ratios[i],\n                act_layer=act_layer, norm_layer=norm_layer,\n                drop_rate=drop_rate, drop_path_rate=drop_path_rate,\n                layer_scale_init_value=layer_scale_init_value)\n            )\n            if i < len(layers) - 1 and (downsamples[i] or embed_dims[i] != embed_dims[i + 1]):\n                # downsampling between stages\n                network.append(PatchEmbed(\n                    in_chs=embed_dims[i], embed_dim=embed_dims[i + 1],\n                    patch_size=down_patch_size, stride=down_stride, padding=down_pad)\n                )\n\n        self.network = nn.SequentialCell(*network)\n        self.norm = norm_layer(1, embed_dims[-1])\n        self.head = nn.Dense(embed_dims[-1], num_classes, has_bias=True) if num_classes > 0 else Identity()\n        # self._initialize_weights()\n        self.cls_init_weights()\n\n    def cls_init_weights(self):\n\"\"\"Initialize weights for cells.\"\"\"\n        for name, m in self.cells_and_names():\n            if isinstance(m, nn.Dense):\n                m.weight.set_data(\n                    init.initializer(init.TruncatedNormal(sigma=.02), m.weight.shape, m.weight.dtype))\n                if m.bias is not None:\n                    m.bias.set_data(\n                        init.initializer(init.Constant(0), m.bias.shape, m.bias.dtype))\n\n    def reset_classifier(self, num_classes, global_pool=None):\n        self.num_classes = num_classes\n        if global_pool is not None:\n            self.global_pool = global_pool\n        self.head = nn.Dense(self.num_features, num_classes) if num_classes > 0 else Identity()\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.patch_embed(x)\n        x = self.network(x)\n        if self.fork_feat:\n            # otuput features of four stages for dense prediction\n            return x\n        x = self.norm(x)\n        return x\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        return self.head(x.mean([-2, -1]))\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        return self.forward_head(x)\n
"},{"location":"reference/models/#mindcv.models.poolformer.PoolFormer.cls_init_weights","title":"mindcv.models.poolformer.PoolFormer.cls_init_weights()","text":"

Initialize weights for cells.

Source code in mindcv/models/poolformer.py
def cls_init_weights(self):\n\"\"\"Initialize weights for cells.\"\"\"\n    for name, m in self.cells_and_names():\n        if isinstance(m, nn.Dense):\n            m.weight.set_data(\n                init.initializer(init.TruncatedNormal(sigma=.02), m.weight.shape, m.weight.dtype))\n            if m.bias is not None:\n                m.bias.set_data(\n                    init.initializer(init.Constant(0), m.bias.shape, m.bias.dtype))\n
"},{"location":"reference/models/#mindcv.models.poolformer.poolformer_m36","title":"mindcv.models.poolformer.poolformer_m36(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get poolformer_m36 model. Refer to the base class models.PoolFormer for more details.

Source code in mindcv/models/poolformer.py
@register_model\ndef poolformer_m36(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolFormer:\n\"\"\"Get poolformer_m36 model.\n    Refer to the base class `models.PoolFormer` for more details.\"\"\"\n    default_cfg = default_cfgs[\"poolformer_m36\"]\n    layers = (6, 6, 18, 6)\n    embed_dims = (96, 192, 384, 768)\n    model = PoolFormer(\n        in_chans=in_channels,\n        num_classes=num_classes,\n        layers=layers,\n        layer_scale_init_value=1e-6,\n        embed_dims=embed_dims,\n        **kwargs\n    )\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.poolformer.poolformer_m48","title":"mindcv.models.poolformer.poolformer_m48(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get poolformer_m48 model. Refer to the base class models.PoolFormer for more details.

Source code in mindcv/models/poolformer.py
@register_model\ndef poolformer_m48(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolFormer:\n\"\"\"Get poolformer_m48 model.\n    Refer to the base class `models.PoolFormer` for more details.\"\"\"\n    default_cfg = default_cfgs[\"poolformer_m48\"]\n    layers = (8, 8, 24, 8)\n    embed_dims = (96, 192, 384, 768)\n    model = PoolFormer(\n        in_chans=in_channels,\n        num_classes=num_classes,\n        layers=layers,\n        layer_scale_init_value=1e-6,\n        embed_dims=embed_dims,\n        **kwargs\n    )\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.poolformer.poolformer_s12","title":"mindcv.models.poolformer.poolformer_s12(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get poolformer_s12 model. Refer to the base class models.PoolFormer for more details.

Source code in mindcv/models/poolformer.py
@register_model\ndef poolformer_s12(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolFormer:\n\"\"\"Get poolformer_s12 model.\n    Refer to the base class `models.PoolFormer` for more details.\"\"\"\n    default_cfg = default_cfgs[\"poolformer_s12\"]\n    model = PoolFormer(in_chans=in_channels, num_classes=num_classes, layers=(2, 2, 6, 2), **kwargs)\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.poolformer.poolformer_s24","title":"mindcv.models.poolformer.poolformer_s24(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get poolformer_s24 model. Refer to the base class models.PoolFormer for more details.

Source code in mindcv/models/poolformer.py
@register_model\ndef poolformer_s24(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolFormer:\n\"\"\"Get poolformer_s24 model.\n    Refer to the base class `models.PoolFormer` for more details.\"\"\"\n    default_cfg = default_cfgs[\"poolformer_s24\"]\n    model = PoolFormer(in_chans=in_channels, num_classes=num_classes, layers=(4, 4, 12, 4), **kwargs)\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.poolformer.poolformer_s36","title":"mindcv.models.poolformer.poolformer_s36(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get poolformer_s36 model. Refer to the base class models.PoolFormer for more details.

Source code in mindcv/models/poolformer.py
@register_model\ndef poolformer_s36(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolFormer:\n\"\"\"Get poolformer_s36 model.\n    Refer to the base class `models.PoolFormer` for more details.\"\"\"\n    default_cfg = default_cfgs[\"poolformer_s36\"]\n    model = PoolFormer(\n        in_chans=in_channels, num_classes=num_classes, layers=(6, 6, 18, 6), layer_scale_init_value=1e-6, **kwargs\n    )\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#pvt","title":"pvt","text":""},{"location":"reference/models/#mindcv.models.pvt.PyramidVisionTransformer","title":"mindcv.models.pvt.PyramidVisionTransformer","text":"

Bases: nn.Cell

Pyramid Vision Transformer model class, based on \"Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions\" <https://arxiv.org/abs/2102.12122>_ # noqa: E501

PARAMETER DESCRIPTION img_size(int)

size of a input image.

patch_size

size of a single image patch.

TYPE: int) DEFAULT: 4

in_chans

number the channels of the input. Default: 3.

TYPE: int) DEFAULT: 3

num_classes

number of classification classes. Default: 1000.

TYPE: int) DEFAULT: 1000

embed_dims

how many hidden dim in each PatchEmbed.

TYPE: list) DEFAULT: [64, 128, 320, 512]

num_heads

number of attention head in each stage.

TYPE: list) DEFAULT: [1, 2, 5, 8]

mlp_ratios

ratios of MLP hidden dims in each stage.

TYPE: list DEFAULT: [8, 8, 4, 4]

qkv_bias(bool)

use bias in attention.

qk_scale(float)

Scale multiplied by qk in attention(if not none), otherwise head_dim ** -0.5.

drop_rate(float)

The drop rate for each block. Default: 0.0.

attn_drop_rate(float)

The drop rate for attention. Default: 0.0.

drop_path_rate(float)

The drop rate for drop path. Default: 0.0.

norm_layer(nn.Cell)

Norm layer that will be used in blocks. Default: nn.LayerNorm.

depths

number of Blocks.

TYPE: list) DEFAULT: [2, 2, 2, 2]

sr_ratios(list)

stride and kernel size of each attention.

num_stages(int)

number of stage. Default: 4.

Source code in mindcv/models/pvt.py
class PyramidVisionTransformer(nn.Cell):\nr\"\"\"Pyramid Vision Transformer model class, based on\n    `\"Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions\" <https://arxiv.org/abs/2102.12122>`_  # noqa: E501\n\n    Args:\n        img_size(int) : size of a input image.\n        patch_size (int) : size of a single image patch.\n        in_chans (int) : number the channels of the input. Default: 3.\n        num_classes (int) : number of classification classes. Default: 1000.\n        embed_dims (list) : how many hidden dim in each PatchEmbed.\n        num_heads (list) : number of attention head in each stage.\n        mlp_ratios (list): ratios of MLP hidden dims in each stage.\n        qkv_bias(bool) : use bias in attention.\n        qk_scale(float) : Scale multiplied by qk in attention(if not none), otherwise head_dim ** -0.5.\n        drop_rate(float) : The drop rate for each block. Default: 0.0.\n        attn_drop_rate(float) : The drop rate for attention. Default: 0.0.\n        drop_path_rate(float) : The drop rate for drop path. Default: 0.0.\n        norm_layer(nn.Cell) : Norm layer that will be used in blocks. Default: nn.LayerNorm.\n        depths (list) : number of Blocks.\n        sr_ratios(list) : stride and kernel size of each attention.\n        num_stages(int) : number of stage. Default: 4.\n    \"\"\"\n\n    def __init__(self, img_size=224, patch_size=4, in_chans=3, num_classes=1000, embed_dims=[64, 128, 320, 512],\n                 num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True, qk_scale=None, drop_rate=0.0,\n                 attn_drop_rate=0.0, drop_path_rate=0.0, norm_layer=nn.LayerNorm,\n                 depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1], num_stages=4):\n        super(PyramidVisionTransformer, self).__init__()\n        self.num_classes = num_classes\n        self.depths = depths\n        self.num_stages = num_stages\n        start = Tensor(0, mindspore.float32)\n        stop = Tensor(drop_path_rate, mindspore.float32)\n        dpr = [float(x) for x in ops.linspace(start, stop, sum(depths))]  # stochastic depth decay rule\n        cur = 0\n        b_list = []\n        self.pos_embed = []\n        self.pos_drop = Dropout(p=drop_rate)\n        for i in range(num_stages):\n            block = nn.CellList(\n                [Block(dim=embed_dims[i], num_heads=num_heads[i], mlp_ratio=mlp_ratios[i], qkv_bias=qkv_bias,\n                       qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + j],\n                       norm_layer=norm_layer, sr_ratio=sr_ratios[i])\n                 for j in range(depths[i])\n                 ])\n\n            b_list.append(block)\n            cur += depths[0]\n\n        self.patch_embed1 = PatchEmbed(img_size=img_size,\n                                       patch_size=patch_size,\n                                       in_chans=in_chans,\n                                       embed_dim=embed_dims[0])\n        num_patches = self.patch_embed1.num_patches\n        self.pos_embed1 = mindspore.Parameter(ops.zeros((1, num_patches, embed_dims[0]), mindspore.float16))\n        self.pos_drop1 = Dropout(p=drop_rate)\n\n        self.patch_embed2 = PatchEmbed(img_size=img_size // (2 ** (1 + 1)),\n                                       patch_size=2,\n                                       in_chans=embed_dims[1 - 1],\n                                       embed_dim=embed_dims[1])\n        num_patches = self.patch_embed2.num_patches\n        self.pos_embed2 = mindspore.Parameter(ops.zeros((1, num_patches, embed_dims[1]), mindspore.float16))\n        self.pos_drop2 = Dropout(p=drop_rate)\n\n        self.patch_embed3 = PatchEmbed(img_size=img_size // (2 ** (2 + 1)),\n                                       patch_size=2,\n                                       in_chans=embed_dims[2 - 1],\n                                       embed_dim=embed_dims[2])\n        num_patches = self.patch_embed3.num_patches\n        self.pos_embed3 = mindspore.Parameter(ops.zeros((1, num_patches, embed_dims[2]), mindspore.float16))\n        self.pos_drop3 = Dropout(p=drop_rate)\n\n        self.patch_embed4 = PatchEmbed(img_size // (2 ** (3 + 1)),\n                                       patch_size=2,\n                                       in_chans=embed_dims[3 - 1],\n                                       embed_dim=embed_dims[3])\n        num_patches = self.patch_embed4.num_patches + 1\n        self.pos_embed4 = mindspore.Parameter(ops.zeros((1, num_patches, embed_dims[3]), mindspore.float16))\n        self.pos_drop4 = Dropout(p=drop_rate)\n        self.Blocks = nn.CellList(b_list)\n\n        self.norm = norm_layer([embed_dims[3]])\n\n        # cls_token\n        self.cls_token = mindspore.Parameter(ops.zeros((1, 1, embed_dims[3]), mindspore.float32))\n\n        # classification head\n        self.head = nn.Dense(embed_dims[3], num_classes) if num_classes > 0 else Identity()\n        self.reshape = ops.reshape\n        self.transpose = ops.transpose\n        self.tile = ops.Tile()\n        self.Concat = ops.Concat(axis=1)\n        self._initialize_weights()\n\n    def _initialize_weights(self):\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Dense):\n                cell.weight.set_data(weight_init.initializer(weight_init.TruncatedNormal(sigma=0.02),\n                                                             cell.weight.shape, cell.weight.dtype))\n                if isinstance(cell, nn.Dense) and cell.bias is not None:\n                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, nn.LayerNorm):\n                cell.gamma.set_data(weight_init.initializer(weight_init.One(), cell.gamma.shape, cell.gamma.dtype))\n                cell.beta.set_data(weight_init.initializer(weight_init.Zero(), cell.beta.shape, cell.beta.dtype))\n            elif isinstance(cell, nn.Conv2d):\n                fan_out = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels\n                fan_out //= cell.group\n                cell.weight.set_data(weight_init.initializer(weight_init.Normal(sigma=math.sqrt(2.0 / fan_out)),\n                                                             cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))\n\n    def get_classifier(self):\n        return self.head\n\n    def reset_classifier(self, num_classes, global_pool=\"\"):\n        self.num_classes = num_classes\n        self.head = nn.Dense(self.embed_dim, num_classes) if num_classes > 0 else Identity()\n\n    def _get_pos_embed(self, pos_embed, ph, pw, H, W):\n        if H * W == self.patch_embed1.num_patches:\n            return pos_embed\n        else:\n            pos_embed = self.transpose(self.reshape(pos_embed, (1, ph, pw, -1)), (0, 3, 1, 2))\n            resize_bilinear = ops.ResizeBilinear((H, W))\n            pos_embed = resize_bilinear(pos_embed)\n\n            pos_embed = self.transpose(self.reshape(pos_embed, (1, -1, H * W)), (0, 2, 1))\n\n            return pos_embed\n\n    def forward_features(self, x):\n        B = x.shape[0]\n\n        x, (H, W) = self.patch_embed1(x)\n        pos_embed = self.pos_embed1\n        x = self.pos_drop1(x + pos_embed)\n        for blk in self.Blocks[0]:\n            x = blk(x, H, W)\n        x = self.transpose(self.reshape(x, (B, H, W, -1)), (0, 3, 1, 2))\n\n        x, (H, W) = self.patch_embed2(x)\n        ph, pw = self.patch_embed2.H, self.patch_embed2.W\n        pos_embed = self._get_pos_embed(self.pos_embed2, ph, pw, H, W)\n        x = self.pos_drop2(x + pos_embed)\n        for blk in self.Blocks[1]:\n            x = blk(x, H, W)\n        x = self.transpose(self.reshape(x, (B, H, W, -1)), (0, 3, 1, 2))\n\n        x, (H, W) = self.patch_embed3(x)\n        ph, pw = self.patch_embed3.H, self.patch_embed3.W\n        pos_embed = self._get_pos_embed(self.pos_embed3, ph, pw, H, W)\n        x = self.pos_drop3(x + pos_embed)\n        for blk in self.Blocks[2]:\n            x = blk(x, H, W)\n        x = self.transpose(self.reshape(x, (B, H, W, -1)), (0, 3, 1, 2))\n\n        x, (H, W) = self.patch_embed4(x)\n        cls_tokens = self.tile(self.cls_token, (B, 1, 1))\n\n        x = self.Concat((cls_tokens, x))\n        ph, pw = self.patch_embed4.H, self.patch_embed4.W\n        pos_embed_ = self._get_pos_embed(self.pos_embed4[:, 1:], ph, pw, H, W)\n        pos_embed = self.Concat((self.pos_embed4[:, 0:1], pos_embed_))\n        x = self.pos_drop4(x + pos_embed)\n        for blk in self.Blocks[3]:\n            x = blk(x, H, W)\n\n        x = self.norm(x)\n\n        return x[:, 0]\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        return self.head(x)\n\n    def construct(self, x):\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n\n        return x\n
"},{"location":"reference/models/#mindcv.models.pvt.pvt_large","title":"mindcv.models.pvt.pvt_large(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get PVT large model Refer to the base class \"models.PVT\" for more details.

Source code in mindcv/models/pvt.py
@register_model\ndef pvt_large(\n    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs\n) -> PyramidVisionTransformer:\n\"\"\"Get PVT large model\n    Refer to the base class \"models.PVT\" for more details.\n    \"\"\"\n    default_cfg = default_cfgs['pvt_large']\n    model = PyramidVisionTransformer(in_chans=in_channels, num_classes=num_classes,\n                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],\n                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,\n                                     norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 8, 27, 3],\n                                     sr_ratios=[8, 4, 2, 1], **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.pvt.pvt_medium","title":"mindcv.models.pvt.pvt_medium(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get PVT medium model Refer to the base class \"models.PVT\" for more details.

Source code in mindcv/models/pvt.py
@register_model\ndef pvt_medium(\n    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs\n) -> PyramidVisionTransformer:\n\"\"\"Get PVT medium model\n    Refer to the base class \"models.PVT\" for more details.\n    \"\"\"\n    default_cfg = default_cfgs['pvt_medium']\n    model = PyramidVisionTransformer(in_chans=in_channels, num_classes=num_classes,\n                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],\n                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,\n                                     norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 4, 18, 3],\n                                     sr_ratios=[8, 4, 2, 1], **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.pvt.pvt_small","title":"mindcv.models.pvt.pvt_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get PVT small model Refer to the base class \"models.PVT\" for more details.

Source code in mindcv/models/pvt.py
@register_model\ndef pvt_small(\n    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs\n) -> PyramidVisionTransformer:\n\"\"\"Get PVT small model\n    Refer to the base class \"models.PVT\" for more details.\n    \"\"\"\n    default_cfg = default_cfgs['pvt_small']\n    model = PyramidVisionTransformer(in_chans=in_channels, num_classes=num_classes,\n                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],\n                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,\n                                     norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 4, 6, 3],\n                                     sr_ratios=[8, 4, 2, 1], **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.pvt.pvt_tiny","title":"mindcv.models.pvt.pvt_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get PVT tiny model Refer to the base class \"models.PVT\" for more details.

Source code in mindcv/models/pvt.py
@register_model\ndef pvt_tiny(\n    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs\n) -> PyramidVisionTransformer:\n\"\"\"Get PVT tiny model\n    Refer to the base class \"models.PVT\" for more details.\n    \"\"\"\n    default_cfg = default_cfgs['pvt_tiny']\n    model = PyramidVisionTransformer(in_chans=in_channels, num_classes=num_classes,\n                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],\n                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,\n                                     norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[2, 2, 2, 2],\n                                     sr_ratios=[8, 4, 2, 1], **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#pvtv2","title":"pvtv2","text":""},{"location":"reference/models/#mindcv.models.pvtv2.PyramidVisionTransformerV2","title":"mindcv.models.pvtv2.PyramidVisionTransformerV2","text":"

Bases: nn.Cell

Pyramid Vision Transformer V2 model class, based on \"PVTv2: Improved Baselines with Pyramid Vision Transformer\" <https://arxiv.org/abs/2106.13797>_

PARAMETER DESCRIPTION img_size(int)

size of a input image.

patch_size

size of a single image patch.

TYPE: int) DEFAULT: 16

in_chans

number the channels of the input. Default: 3.

TYPE: int) DEFAULT: 3

num_classes

number of classification classes. Default: 1000.

TYPE: int) DEFAULT: 1000

embed_dims

how many hidden dim in each PatchEmbed.

TYPE: list) DEFAULT: [64, 128, 256, 512]

num_heads

number of attention head in each stage.

TYPE: list) DEFAULT: [1, 2, 4, 8]

mlp_ratios

ratios of MLP hidden dims in each stage.

TYPE: list DEFAULT: [4, 4, 4, 4]

qkv_bias(bool)

use bias in attention.

qk_scale(float)

Scale multiplied by qk in attention(if not none), otherwise head_dim ** -0.5.

drop_rate(float)

The drop rate for each block. Default: 0.0.

attn_drop_rate(float)

The drop rate for attention. Default: 0.0.

drop_path_rate(float)

The drop rate for drop path. Default: 0.0.

norm_layer(nn.Cell)

Norm layer that will be used in blocks. Default: nn.LayerNorm.

depths

number of Blocks.

TYPE: list) DEFAULT: [3, 4, 6, 3]

sr_ratios(list)

stride and kernel size of each attention.

num_stages(int)

number of stage. Default: 4.

linear(bool)

use linear SRA.

Source code in mindcv/models/pvtv2.py
class PyramidVisionTransformerV2(nn.Cell):\nr\"\"\"Pyramid Vision Transformer V2 model class, based on\n    `\"PVTv2: Improved Baselines with Pyramid Vision Transformer\" <https://arxiv.org/abs/2106.13797>`_\n\n    Args:\n        img_size(int) : size of a input image.\n        patch_size (int) : size of a single image patch.\n        in_chans (int) : number the channels of the input. Default: 3.\n        num_classes (int) : number of classification classes. Default: 1000.\n        embed_dims (list) : how many hidden dim in each PatchEmbed.\n        num_heads (list) : number of attention head in each stage.\n        mlp_ratios (list): ratios of MLP hidden dims in each stage.\n        qkv_bias(bool) : use bias in attention.\n        qk_scale(float) : Scale multiplied by qk in attention(if not none), otherwise head_dim ** -0.5.\n        drop_rate(float) : The drop rate for each block. Default: 0.0.\n        attn_drop_rate(float) : The drop rate for attention. Default: 0.0.\n        drop_path_rate(float) : The drop rate for drop path. Default: 0.0.\n        norm_layer(nn.Cell) : Norm layer that will be used in blocks. Default: nn.LayerNorm.\n        depths (list) : number of Blocks.\n        sr_ratios(list) : stride and kernel size of each attention.\n        num_stages(int) : number of stage. Default: 4.\n        linear(bool) :  use linear SRA.\n    \"\"\"\n\n    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dims=[64, 128, 256, 512],\n                 num_heads=[1, 2, 4, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=False, qk_scale=None, drop_rate=0.,\n                 attn_drop_rate=0., drop_path_rate=0., norm_layer=nn.LayerNorm,\n                 depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1], num_stages=4, linear=False):\n        super().__init__()\n        self.num_classes = num_classes\n        self.depths = depths\n        self.num_stages = num_stages\n\n        start = Tensor(0, mindspore.float32)\n        stop = Tensor(drop_path_rate, mindspore.float32)\n        dpr = [float(x) for x in ops.linspace(start, stop, sum(depths))]  # stochastic depth decay rule\n        cur = 0\n\n        patch_embed_list = []\n        block_list = []\n        norm_list = []\n\n        for i in range(num_stages):\n            patch_embed = OverlapPatchEmbed(img_size=img_size if i == 0 else img_size // (2 ** (i + 1)),\n                                            patch_size=7 if i == 0 else 3,\n                                            stride=4 if i == 0 else 2,\n                                            in_chans=in_chans if i == 0 else embed_dims[i - 1],\n                                            embed_dim=embed_dims[i])\n\n            block = nn.CellList([Block(\n                dim=embed_dims[i], num_heads=num_heads[i], mlp_ratio=mlp_ratios[i], qkv_bias=qkv_bias,\n                qk_scale=qk_scale,\n                drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + j], norm_layer=norm_layer,\n                sr_ratio=sr_ratios[i], linear=linear, block_id=j)\n                for j in range(depths[i])])\n\n            norm = norm_layer([embed_dims[i]])\n\n            cur += depths[i]\n\n            patch_embed_list.append(patch_embed)\n            block_list.append(block)\n            norm_list.append(norm)\n        self.patch_embed_list = nn.CellList(patch_embed_list)\n        self.block_list = nn.CellList(block_list)\n        self.norm_list = nn.CellList(norm_list)\n        # classification head\n        self.head = nn.Dense(embed_dims[3], num_classes) if num_classes > 0 else Identity()\n        self._initialize_weights()\n\n    def freeze_patch_emb(self):\n        self.patch_embed_list[0].requires_grad = False\n\n    def _initialize_weights(self):\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Dense):\n                cell.weight.set_data(weight_init.initializer(weight_init.TruncatedNormal(sigma=0.02),\n                                                             cell.weight.shape, cell.weight.dtype))\n                if isinstance(cell, nn.Dense) and cell.bias is not None:\n                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, nn.LayerNorm):\n                cell.gamma.set_data(weight_init.initializer(weight_init.One(), cell.gamma.shape, cell.gamma.dtype))\n                cell.beta.set_data(weight_init.initializer(weight_init.Zero(), cell.beta.shape, cell.beta.dtype))\n            elif isinstance(cell, nn.Conv2d):\n                fan_out = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels\n                fan_out //= cell.group\n                cell.weight.set_data(weight_init.initializer(weight_init.Normal(sigma=math.sqrt(2.0 / fan_out)),\n                                                             cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))\n\n    def get_classifier(self):\n        return self.head\n\n    def reset_classifier(self, num_classes, global_pool=\"\"):\n        self.num_classes = num_classes\n        self.head = nn.Dense(self.embed_dim, num_classes) if num_classes > 0 else Identity()\n\n    def forward_features(self, x):\n        B = x.shape[0]\n\n        for i in range(self.num_stages):\n            patch_embed = self.patch_embed_list[i]\n            block = self.block_list[i]\n            norm = self.norm_list[i]\n            x, H, W = patch_embed(x)\n            for blk in block:\n                x = blk(x, H, W)\n            x = norm(x)\n            if i != self.num_stages - 1:\n                x = ops.transpose(ops.reshape(x, (B, H, W, -1)), (0, 3, 1, 2))\n\n        return x.mean(axis=1)\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        return self.head(x)\n\n    def construct(self, x):\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n\n        return x\n
"},{"location":"reference/models/#mindcv.models.pvtv2.pvt_v2_b0","title":"mindcv.models.pvtv2.pvt_v2_b0(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get PVTV2-b0 model Refer to the base class \"models.PVTv2\" for more details.

Source code in mindcv/models/pvtv2.py
@register_model\ndef pvt_v2_b0(\n    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs\n) -> PyramidVisionTransformerV2:\n\"\"\"Get PVTV2-b0 model\n    Refer to the base class \"models.PVTv2\" for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"pvt_v2_b0\"]\n    model = PyramidVisionTransformerV2(\n        in_chans=in_channels, num_classes=num_classes,\n        patch_size=4, embed_dims=[32, 64, 160, 256], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,\n        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1], **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.pvtv2.pvt_v2_b1","title":"mindcv.models.pvtv2.pvt_v2_b1(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get PVTV2-b1 model Refer to the base class \"models.PVTv2\" for more details.

Source code in mindcv/models/pvtv2.py
@register_model\ndef pvt_v2_b1(\n    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs\n) -> PyramidVisionTransformerV2:\n\"\"\"Get PVTV2-b1 model\n    Refer to the base class \"models.PVTv2\" for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"pvt_v2_b1\"]\n    model = PyramidVisionTransformerV2(\n        in_chans=in_channels, num_classes=num_classes,\n        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,\n        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1], **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.pvtv2.pvt_v2_b2","title":"mindcv.models.pvtv2.pvt_v2_b2(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get PVTV2-b2 model Refer to the base class \"models.PVTv2\" for more details.

Source code in mindcv/models/pvtv2.py
@register_model\ndef pvt_v2_b2(\n    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs\n) -> PyramidVisionTransformerV2:\n\"\"\"Get PVTV2-b2 model\n    Refer to the base class \"models.PVTv2\" for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"pvt_v2_b2\"]\n    model = PyramidVisionTransformerV2(\n        in_chans=in_channels, num_classes=num_classes,\n        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,\n        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1], **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.pvtv2.pvt_v2_b3","title":"mindcv.models.pvtv2.pvt_v2_b3(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get PVTV2-b3 model Refer to the base class \"models.PVTv2\" for more details.

Source code in mindcv/models/pvtv2.py
@register_model\ndef pvt_v2_b3(\n    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs\n) -> PyramidVisionTransformerV2:\n\"\"\"Get PVTV2-b3 model\n    Refer to the base class \"models.PVTv2\" for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"pvt_v2_b3\"]\n    model = PyramidVisionTransformerV2(\n        in_chans=in_channels, num_classes=num_classes,\n        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,\n        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 4, 18, 3], sr_ratios=[8, 4, 2, 1], **kwargs)\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.pvtv2.pvt_v2_b4","title":"mindcv.models.pvtv2.pvt_v2_b4(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get PVTV2-b4 model Refer to the base class \"models.PVTv2\" for more details.

Source code in mindcv/models/pvtv2.py
@register_model\ndef pvt_v2_b4(\n    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs\n) -> PyramidVisionTransformerV2:\n\"\"\"Get PVTV2-b4 model\n    Refer to the base class \"models.PVTv2\" for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"pvt_v2_b4\"]\n    model = PyramidVisionTransformerV2(\n        in_chans=in_channels, num_classes=num_classes,\n        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,\n        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 8, 27, 3], sr_ratios=[8, 4, 2, 1], **kwargs)\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.pvtv2.pvt_v2_b5","title":"mindcv.models.pvtv2.pvt_v2_b5(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get PVTV2-b5 model Refer to the base class \"models.PVTv2\" for more details.

Source code in mindcv/models/pvtv2.py
@register_model\ndef pvt_v2_b5(\n    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs\n) -> PyramidVisionTransformerV2:\n\"\"\"Get PVTV2-b5 model\n    Refer to the base class \"models.PVTv2\" for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"pvt_v2_b5\"]\n    model = PyramidVisionTransformerV2(\n        in_chans=in_channels, num_classes=num_classes,\n        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=True,\n        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 6, 40, 3], sr_ratios=[8, 4, 2, 1], **kwargs)\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#regnet","title":"regnet","text":""},{"location":"reference/models/#mindcv.models.regnet.regnet_x_12gf","title":"mindcv.models.regnet.regnet_x_12gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_x_12gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_x_12gf\"]\n    model = RegNet(73.36, 168, 2.37, 19, 112, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_x_16gf","title":"mindcv.models.regnet.regnet_x_16gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_x_16gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_x_16gf\"]\n    model = RegNet(55.59, 216, 2.1, 22, 128, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_x_1_6gf","title":"mindcv.models.regnet.regnet_x_1_6gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_x_1_6gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_x_1_6gf\"]\n    model = RegNet(34.01, 80, 2.25, 18, 24, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_x_200mf","title":"mindcv.models.regnet.regnet_x_200mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_x_200mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_x_200mf\"]\n    model = RegNet(36.44, 24, 2.49, 13, 8, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_x_32gf","title":"mindcv.models.regnet.regnet_x_32gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_x_32gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_x_32gf\"]\n    model = RegNet(69.86, 320, 2.0, 23, 168, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_x_3_2gf","title":"mindcv.models.regnet.regnet_x_3_2gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_x_3_2gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_x_3_2gf\"]\n    model = RegNet(26.31, 88, 2.25, 25, 48, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_x_400mf","title":"mindcv.models.regnet.regnet_x_400mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_x_400mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_x_400mf\"]\n    model = RegNet(24.48, 24, 2.54, 22, 16, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_x_4_0gf","title":"mindcv.models.regnet.regnet_x_4_0gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_x_4_0gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_x_4_0gf\"]\n    model = RegNet(38.65, 96, 2.43, 23, 40, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_x_600mf","title":"mindcv.models.regnet.regnet_x_600mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_x_600mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_x_600mf\"]\n    model = RegNet(36.97, 48, 2.24, 16, 24, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_x_6_4gf","title":"mindcv.models.regnet.regnet_x_6_4gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_x_6_4gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_x_6_4gf\"]\n    model = RegNet(60.83, 184, 2.07, 17, 56, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_x_800mf","title":"mindcv.models.regnet.regnet_x_800mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_x_800mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_x_800mf\"]\n    model = RegNet(35.73, 56, 2.28, 16, 16, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_x_8_0gf","title":"mindcv.models.regnet.regnet_x_8_0gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_x_8_0gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_x_8_0gf\"]\n    model = RegNet(49.56, 80, 2.88, 23, 120, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_y_12gf","title":"mindcv.models.regnet.regnet_y_12gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_y_12gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_y_12gf\"]\n    model = RegNet(73.36, 168, 2.37, 19, 112, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_y_16gf","title":"mindcv.models.regnet.regnet_y_16gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_y_16gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_y_16gf\"]\n    model = RegNet(106.23, 200, 2.48, 18, 112, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_y_1_6gf","title":"mindcv.models.regnet.regnet_y_1_6gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_y_1_6gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_y_1_6gf\"]\n    model = RegNet(20.71, 48, 2.65, 27, 24, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_y_200mf","title":"mindcv.models.regnet.regnet_y_200mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_y_200mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_y_200mf\"]\n    model = RegNet(36.44, 24, 2.49, 13, 8, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_y_32gf","title":"mindcv.models.regnet.regnet_y_32gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_y_32gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_y_32gf\"]\n    model = RegNet(115.89, 232, 2.53, 20, 232, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_y_3_2gf","title":"mindcv.models.regnet.regnet_y_3_2gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_y_3_2gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_y_3_2gf\"]\n    model = RegNet(42.63, 80, 2.66, 21, 24, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_y_400mf","title":"mindcv.models.regnet.regnet_y_400mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_y_400mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_y_400mf\"]\n    model = RegNet(27.89, 48, 2.09, 16, 8, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_y_4_0gf","title":"mindcv.models.regnet.regnet_y_4_0gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_y_4_0gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_y_4_0gf\"]\n    model = RegNet(31.41, 96, 2.24, 22, 64, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_y_600mf","title":"mindcv.models.regnet.regnet_y_600mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_y_600mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_y_600mf\"]\n    model = RegNet(32.54, 48, 2.32, 15, 16, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_y_6_4gf","title":"mindcv.models.regnet.regnet_y_6_4gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_y_6_4gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_y_6_4gf\"]\n    model = RegNet(33.22, 112, 2.27, 25, 72, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_y_800mf","title":"mindcv.models.regnet.regnet_y_800mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_y_800mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_y_800mf\"]\n    model = RegNet(38.84, 56, 2.4, 14, 16, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.regnet.regnet_y_8_0gf","title":"mindcv.models.regnet.regnet_y_8_0gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/regnet.py
@register_model\ndef regnet_y_8_0gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"regnet_y_8_0gf\"]\n    model = RegNet(76.82, 192, 2.19, 17, 56, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#repmlp","title":"repmlp","text":""},{"location":"reference/models/#mindcv.models.repmlp.RepMLPNet","title":"mindcv.models.repmlp.RepMLPNet","text":"

Bases: nn.Cell

RepMLPNet model class, based on \"RepMLPNet: Hierarchical Vision MLP with Re-parameterized Locality\" <https://arxiv.org/pdf/2112.11081v2.pdf>_

PARAMETER DESCRIPTION in_channels

number of input channels. Default: 3.

DEFAULT: 3

num_classes

number of classification classes. Default: 1000.

patch_size

size of a single image patch. Default: (4, 4)

DEFAULT: (4, 4)

num_blocks

number of blocks per stage. Default: (2,2,6,2)

DEFAULT: (2, 2, 6, 2)

channels

number of in_channels(channels[stage_idx]) and out_channels(channels[stage_idx + 1]) per stage. Default: (192,384,768,1536)

DEFAULT: (192, 384, 768, 1536)

hs

height of picture per stage. Default: (64,32,16,8)

DEFAULT: (64, 32, 16, 8)

ws

width of picture per stage. Default: (64,32,16,8)

DEFAULT: (64, 32, 16, 8)

sharesets_nums

number of share sets per stage. Default: (4,8,16,32)

DEFAULT: (4, 8, 16, 32)

reparam_conv_k

convolution kernel size in local Perceptron. Default: (3,)

DEFAULT: (3)

globalperceptron_reduce

Intermediate convolution output size (in_channal = inchannal, out_channel = in_channel/globalperceptron_reduce) in globalperceptron. Default: 4

DEFAULT: 4

use_checkpoint

whether to use checkpoint

DEFAULT: False

deploy

whether to use bias

DEFAULT: False

Source code in mindcv/models/repmlp.py
class RepMLPNet(nn.Cell):\nr\"\"\"RepMLPNet model class, based on\n    `\"RepMLPNet: Hierarchical Vision MLP with Re-parameterized Locality\" <https://arxiv.org/pdf/2112.11081v2.pdf>`_\n\n    Args:\n        in_channels: number of input channels. Default: 3.\n        num_classes: number of classification classes. Default: 1000.\n        patch_size: size of a single image patch. Default: (4, 4)\n        num_blocks: number of blocks per stage. Default: (2,2,6,2)\n        channels: number of in_channels(channels[stage_idx]) and out_channels(channels[stage_idx + 1]) per stage.\n            Default: (192,384,768,1536)\n        hs: height of picture per stage. Default: (64,32,16,8)\n        ws: width of picture per stage. Default: (64,32,16,8)\n        sharesets_nums: number of share sets per stage. Default: (4,8,16,32)\n        reparam_conv_k: convolution kernel size in local Perceptron. Default: (3,)\n        globalperceptron_reduce: Intermediate convolution output size\n            (in_channal = inchannal, out_channel = in_channel/globalperceptron_reduce) in globalperceptron. Default: 4\n        use_checkpoint: whether to use checkpoint\n        deploy: whether to use bias\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=3, num_class=1000,\n                 patch_size=(4, 4),\n                 num_blocks=(2, 2, 6, 2), channels=(192, 384, 768, 1536),\n                 hs=(64, 32, 16, 8), ws=(64, 32, 16, 8),\n                 sharesets_nums=(4, 8, 16, 32),\n                 reparam_conv_k=(3,),\n                 globalperceptron_reduce=4, use_checkpoint=False,\n                 deploy=False):\n        super().__init__()\n        num_stages = len(num_blocks)\n        assert num_stages == len(channels)\n        assert num_stages == len(hs)\n        assert num_stages == len(ws)\n        assert num_stages == len(sharesets_nums)\n\n        self.conv_embedding = conv_bn_relu(in_channels, channels[0], kernel_size=patch_size, stride=patch_size,\n                                           padding=0, has_bias=False)\n        self.conv2d = nn.Conv2d(in_channels, channels[0], kernel_size=patch_size, stride=patch_size, padding=0)\n\n        stages = []\n        embeds = []\n        for stage_idx in range(num_stages):\n            stage_blocks = [RepMLPNetUnit(channels=channels[stage_idx], h=hs[stage_idx], w=ws[stage_idx],\n                                          reparam_conv_k=reparam_conv_k,\n                                          globalperceptron_reduce=globalperceptron_reduce, ffn_expand=4,\n                                          num_sharesets=sharesets_nums[stage_idx],\n                                          deploy=deploy) for _ in range(num_blocks[stage_idx])]\n            stages.append(nn.CellList(stage_blocks))\n            if stage_idx < num_stages - 1:\n                embeds.append(\n                    conv_bn_relu(in_channels=channels[stage_idx], out_channels=channels[stage_idx + 1], kernel_size=2,\n                                 stride=2, padding=0))\n        self.stages = nn.CellList(stages)\n        self.embeds = nn.CellList(embeds)\n        self.head_norm = nn.BatchNorm2d(channels[-1]).set_train()\n        self.head = nn.Dense(channels[-1], num_class)\n\n        self.use_checkpoint = use_checkpoint\n        self.shape = ops.Shape()\n        self.reshape = ops.Reshape()\n        self._initialize_weights()\n\n    def _initialize_weights(self):\n\"\"\"Initialize weights for cells.\"\"\"\n        for name, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                k = cell.group / (cell.in_channels * cell.kernel_size[0] * cell.kernel_size[1])\n                k = k ** 0.5\n                cell.weight.set_data(init.initializer(init.Uniform(k), cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(init.Uniform(k), cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, nn.Dense):\n                k = 1 / cell.in_channels\n                k = k ** 0.5\n                cell.weight.set_data(init.initializer(init.Uniform(k), cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(init.Uniform(k), cell.bias.shape, cell.bias.dtype))\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.conv_embedding(x)\n\n        for i, stage in enumerate(self.stages):\n            for block in stage:\n                x = block(x)\n\n            if i < len(self.stages) - 1:\n                embed = self.embeds[i]\n                x = embed(x)\n        x = self.head_norm(x)\n        shape = self.shape(x)\n        pool = nn.AvgPool2d(kernel_size=(shape[2], shape[3]))\n        x = pool(x)\n        return x.view(shape[0], -1)\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        return self.head(x)\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        return self.forward_head(x)\n
"},{"location":"reference/models/#mindcv.models.repmlp.repmlp_b224","title":"mindcv.models.repmlp.repmlp_b224(pretrained=False, image_size=224, num_classes=1000, in_channels=3, deploy=False, **kwargs)","text":"

Get repmlp_b224 model. Refer to the base class models.RepMLPNet for more details.

Source code in mindcv/models/repmlp.py
@register_model\ndef repmlp_b224(pretrained: bool = False, image_size: int = 224, num_classes: int = 1000, in_channels=3,\n                deploy=False, **kwargs):\n\"\"\"Get repmlp_b224 model.\n    Refer to the base class `models.RepMLPNet` for more details.\"\"\"\n    default_cfg = default_cfgs[\"repmlp_b224\"]\n    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(96, 192, 384, 768), hs=(56, 28, 14, 7),\n                      ws=(56, 28, 14, 7),\n                      num_blocks=(2, 2, 12, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 32, 128),\n                      deploy=deploy)\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.repmlp.repmlp_b256","title":"mindcv.models.repmlp.repmlp_b256(pretrained=False, image_size=256, num_classes=1000, in_channels=3, deploy=False, **kwargs)","text":"

Get repmlp_b256 model. Refer to the base class models.RepMLPNet for more details.

Source code in mindcv/models/repmlp.py
@register_model\ndef repmlp_b256(pretrained: bool = False, image_size: int = 256, num_classes: int = 1000, in_channels=3,\n                deploy=False, **kwargs):\n\"\"\"Get repmlp_b256 model.\n    Refer to the base class `models.RepMLPNet` for more details.\"\"\"\n    default_cfg = default_cfgs[\"repmlp_b256\"]\n    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(96, 192, 384, 768), hs=(64, 32, 16, 8),\n                      ws=(64, 32, 16, 8),\n                      num_blocks=(2, 2, 12, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 32, 128),\n                      deploy=deploy)\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.repmlp.repmlp_d256","title":"mindcv.models.repmlp.repmlp_d256(pretrained=False, image_size=256, num_classes=1000, in_channels=3, deploy=False, **kwargs)","text":"

Get repmlp_d256 model. Refer to the base class models.RepMLPNet for more details.

Source code in mindcv/models/repmlp.py
@register_model\ndef repmlp_d256(pretrained: bool = False, image_size: int = 256, num_classes: int = 1000, in_channels=3,\n                deploy=False, **kwargs):\n\"\"\"Get repmlp_d256 model.\n    Refer to the base class `models.RepMLPNet` for more details.\"\"\"\n    default_cfg = default_cfgs[\"repmlp_d256\"]\n    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(80, 160, 320, 640), hs=(64, 32, 16, 8),\n                      ws=(64, 32, 16, 8),\n                      num_blocks=(2, 2, 18, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 16, 128),\n                      deploy=deploy)\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.repmlp.repmlp_l256","title":"mindcv.models.repmlp.repmlp_l256(pretrained=False, image_size=256, num_classes=1000, in_channels=3, deploy=False, **kwargs)","text":"

Get repmlp_l256 model. Refer to the base class models.RepMLPNet for more details.

Source code in mindcv/models/repmlp.py
@register_model\ndef repmlp_l256(pretrained: bool = False, image_size: int = 256, num_classes: int = 1000, in_channels=3,\n                deploy=False, **kwargs):\n\"\"\"Get repmlp_l256 model.\n    Refer to the base class `models.RepMLPNet` for more details.\"\"\"\n    default_cfg = default_cfgs[\"repmlp_l256\"]\n    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(96, 192, 384, 768), hs=(64, 32, 16, 8),\n                      ws=(64, 32, 16, 8),\n                      num_blocks=(2, 2, 18, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 32, 256),\n                      deploy=deploy)\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.repmlp.repmlp_t224","title":"mindcv.models.repmlp.repmlp_t224(pretrained=False, image_size=224, num_classes=1000, in_channels=3, deploy=False, **kwargs)","text":"

Get repmlp_t224 model. Refer to the base class models.RepMLPNet for more details.

Source code in mindcv/models/repmlp.py
@register_model\ndef repmlp_t224(pretrained: bool = False, image_size: int = 224, num_classes: int = 1000, in_channels=3,\n                deploy=False, **kwargs):\n\"\"\"Get repmlp_t224 model. Refer to the base class `models.RepMLPNet` for more details.\"\"\"\n    default_cfg = default_cfgs[\"repmlp_t224\"]\n    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(64, 128, 256, 512), hs=(56, 28, 14, 7),\n                      ws=(56, 28, 14, 7),\n                      num_blocks=(2, 2, 6, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 16, 128),\n                      deploy=deploy)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.repmlp.repmlp_t256","title":"mindcv.models.repmlp.repmlp_t256(pretrained=False, image_size=256, num_classes=1000, in_channels=3, deploy=False, **kwargs)","text":"

Get repmlp_t256 model. Refer to the base class models.RepMLPNet for more details.

Source code in mindcv/models/repmlp.py
@register_model\ndef repmlp_t256(pretrained: bool = False, image_size: int = 256, num_classes: int = 1000, in_channels=3,\n                deploy=False, **kwargs):\n\"\"\"Get repmlp_t256 model.\n    Refer to the base class `models.RepMLPNet` for more details.\"\"\"\n    default_cfg = default_cfgs[\"repmlp_t256\"]\n    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(64, 128, 256, 512), hs=(64, 32, 16, 8),\n                      ws=(64, 32, 16, 8),\n                      num_blocks=(2, 2, 6, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 16, 128),\n                      deploy=deploy)\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#repvgg","title":"repvgg","text":""},{"location":"reference/models/#mindcv.models.repvgg.RepVGG","title":"mindcv.models.repvgg.RepVGG","text":"

Bases: nn.Cell

RepVGG model class, based on \"RepVGGBlock: An all-MLP Architecture for Vision\" <https://arxiv.org/pdf/2101.03697>_

PARAMETER DESCRIPTION num_blocks

number of RepVGGBlocks

TYPE: list)

num_classes

number of classification classes. Default: 1000.

TYPE: int) DEFAULT: 1000

in_channels

number the channels of the input. Default: 3.

TYPE: in_channels) DEFAULT: 3

width_multiplier

the numbers of MLP Architecture.

TYPE: list) DEFAULT: None

override_group_map

the numbers of MLP Architecture.

TYPE: dict) DEFAULT: None

deploy

use rbr_reparam block or not. Default: False

TYPE: bool) DEFAULT: False

use_se

use se_block or not. Default: False

TYPE: bool) DEFAULT: False

Source code in mindcv/models/repvgg.py
class RepVGG(nn.Cell):\nr\"\"\"RepVGG model class, based on\n    `\"RepVGGBlock: An all-MLP Architecture for Vision\" <https://arxiv.org/pdf/2101.03697>`_\n\n    Args:\n        num_blocks (list) : number of RepVGGBlocks\n        num_classes (int) : number of classification classes. Default: 1000.\n        in_channels (in_channels) : number the channels of the input. Default: 3.\n        width_multiplier (list) : the numbers of MLP Architecture.\n        override_group_map (dict) : the numbers of MLP Architecture.\n        deploy (bool) : use rbr_reparam block or not. Default: False\n        use_se (bool) : use se_block or not. Default: False\n    \"\"\"\n\n    def __init__(self, num_blocks, num_classes=1000, in_channels=3, width_multiplier=None, override_group_map=None,\n                 deploy=False, use_se=False):\n        super().__init__()\n\n        assert len(width_multiplier) == 4\n\n        self.deploy = deploy\n        self.override_group_map = override_group_map or {}\n        self.use_se = use_se\n\n        assert 0 not in self.override_group_map\n\n        self.in_planes = min(64, int(64 * width_multiplier[0]))\n\n        self.stage0 = RepVGGBlock(in_channels=in_channels, out_channels=self.in_planes, kernel_size=3, stride=2,\n                                  padding=1,\n                                  deploy=self.deploy, use_se=self.use_se)\n        self.feature_info = [dict(chs=self.in_planes, reduction=2, name=\"stage0\")]\n        self.cur_layer_idx = 1\n        self.stage1 = self._make_stage(\n            int(64 * width_multiplier[0]), num_blocks[0], stride=2)\n        self.feature_info.append(dict(chs=int(64 * width_multiplier[0]), reduction=4, name=\"stage1\"))\n        self.stage2 = self._make_stage(\n            int(128 * width_multiplier[1]), num_blocks[1], stride=2)\n        self.feature_info.append(dict(chs=int(128 * width_multiplier[1]), reduction=8, name=\"stage2\"))\n        self.stage3 = self._make_stage(\n            int(256 * width_multiplier[2]), num_blocks[2], stride=2)\n        self.feature_info.append(dict(chs=int(256 * width_multiplier[2]), reduction=16, name=\"stage3\"))\n        self.stage4 = self._make_stage(\n            int(512 * width_multiplier[3]), num_blocks[3], stride=2)\n        self.feature_info.append(dict(chs=int(512 * width_multiplier[3]), reduction=32, name=\"stage4\"))\n        self.gap = GlobalAvgPooling()\n        self.linear = nn.Dense(int(512 * width_multiplier[3]), num_classes)\n        self._initialize_weights()\n\n    def _make_stage(self, planes, num_blocks, stride):\n        strides = [stride] + [1] * (num_blocks - 1)\n        blocks = []\n        for s in strides:\n            cur_group = self.override_group_map.get(self.cur_layer_idx, 1)\n            blocks.append(RepVGGBlock(in_channels=self.in_planes, out_channels=planes, kernel_size=3,\n                                      stride=s, padding=1, group=cur_group, deploy=self.deploy,\n                                      use_se=self.use_se))\n            self.in_planes = planes\n            self.cur_layer_idx += 1\n\n        return nn.SequentialCell(blocks)\n\n    def _initialize_weights(self) -> None:\n\"\"\"Initialize weights for cells.\"\"\"\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                cell.weight.set_data(\n                    init.initializer(init.HeNormal(mode='fan_out', nonlinearity='relu'),\n                                     cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(\n                        init.initializer('zeros', cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, nn.BatchNorm2d):\n                cell.gamma.set_data(init.initializer('ones', cell.gamma.shape, cell.gamma.dtype))\n                cell.beta.set_data(init.initializer('zeros', cell.beta.shape, cell.beta.dtype))\n            elif isinstance(cell, nn.Dense):\n                cell.weight.set_data(\n                    init.initializer(init.HeUniform(mode='fan_in', nonlinearity='sigmoid'),\n                                     cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer('zeros', cell.bias.shape, cell.bias.dtype))\n\n    def construct(self, x):\n        x = self.stage0(x)\n        x = self.stage1(x)\n        x = self.stage2(x)\n        x = self.stage3(x)\n        x = self.stage4(x)\n        x = self.gap(x)\n        x = self.linear(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.repvgg.repvgg_a0","title":"mindcv.models.repvgg.repvgg_a0(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[0.75, 0.75, 0.75, 2.5]. Refer to the base class models.RepVGG for more details.

Source code in mindcv/models/repvgg.py
@register_model\ndef repvgg_a0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:\n\"\"\"Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[0.75, 0.75, 0.75, 2.5].\n    Refer to the base class `models.RepVGG` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"repvgg_a0\"]\n    model_args = dict(num_blocks=[2, 4, 14, 1], num_classes=num_classes, in_channels=in_channels,\n                      width_multiplier=[0.75, 0.75, 0.75, 2.5], override_group_map=None, deploy=False, **kwargs)\n    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.repvgg.repvgg_a1","title":"mindcv.models.repvgg.repvgg_a1(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[1.0, 1.0, 1.0, 2.5]. Refer to the base class models.RepVGG for more details.

Source code in mindcv/models/repvgg.py
@register_model\ndef repvgg_a1(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:\n\"\"\"Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[1.0, 1.0, 1.0, 2.5].\n     Refer to the base class `models.RepVGG` for more details.\n     \"\"\"\n    default_cfg = default_cfgs[\"repvgg_a1\"]\n    model_args = dict(num_blocks=[2, 4, 14, 1], num_classes=num_classes, in_channels=in_channels,\n                      width_multiplier=[1.0, 1.0, 1.0, 2.5], override_group_map=None, deploy=False, **kwargs)\n    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.repvgg.repvgg_a2","title":"mindcv.models.repvgg.repvgg_a2(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[1.5, 1.5, 1.5, 2.75]. Refer to the base class models.RepVGG for more details.

Source code in mindcv/models/repvgg.py
@register_model\ndef repvgg_a2(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:\n\"\"\"Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[1.5, 1.5, 1.5, 2.75].\n     Refer to the base class `models.RepVGG` for more details.\n     \"\"\"\n    default_cfg = default_cfgs[\"repvgg_a2\"]\n    model_args = dict(num_blocks=[2, 4, 14, 1], num_classes=num_classes, in_channels=in_channels,\n                      width_multiplier=[1.5, 1.5, 1.5, 2.75], override_group_map=None, deploy=False, **kwargs)\n    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.repvgg.repvgg_b0","title":"mindcv.models.repvgg.repvgg_b0(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[1.0, 1.0, 1.0, 2.5]. Refer to the base class models.RepVGG for more details.

Source code in mindcv/models/repvgg.py
@register_model\ndef repvgg_b0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:\n\"\"\"Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[1.0, 1.0, 1.0, 2.5].\n     Refer to the base class `models.RepVGG` for more details.\n     \"\"\"\n    default_cfg = default_cfgs['repvgg_b0']\n    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,\n                      width_multiplier=[1.0, 1.0, 1.0, 2.5], override_group_map=None, deploy=False, **kwargs)\n    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.repvgg.repvgg_b1","title":"mindcv.models.repvgg.repvgg_b1(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0]. Refer to the base class models.RepVGG for more details.

Source code in mindcv/models/repvgg.py
@register_model\ndef repvgg_b1(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:\n\"\"\"Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0].\n     Refer to the base class `models.RepVGG` for more details.\n     \"\"\"\n    default_cfg = default_cfgs['repvgg_b1']\n    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,\n                      width_multiplier=[2.0, 2.0, 2.0, 4.0], override_group_map=None, deploy=False, **kwargs)\n    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.repvgg.repvgg_b1g2","title":"mindcv.models.repvgg.repvgg_b1g2(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0]. Refer to the base class models.RepVGG for more details.

Source code in mindcv/models/repvgg.py
@register_model\ndef repvgg_b1g2(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:\n\"\"\"Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0].\n    Refer to the base class `models.RepVGG` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"repvgg_b1g2\"]\n    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,\n                      width_multiplier=[2.0, 2.0, 2.0, 4.0], override_group_map=g2_map, deploy=False, **kwargs)\n    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.repvgg.repvgg_b1g4","title":"mindcv.models.repvgg.repvgg_b1g4(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0]. Refer to the base class models.RepVGG for more details.

Source code in mindcv/models/repvgg.py
@register_model\ndef repvgg_b1g4(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:\n\"\"\"Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0].\n    Refer to the base class `models.RepVGG` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"repvgg_b1g4\"]\n    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,\n                      width_multiplier=[2.0, 2.0, 2.0, 4.0], override_group_map=g4_map, deploy=False, **kwargs)\n    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.repvgg.repvgg_b2","title":"mindcv.models.repvgg.repvgg_b2(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.5, 2.5, 2.5, 5.0]. Refer to the base class models.RepVGG for more details.

Source code in mindcv/models/repvgg.py
@register_model\ndef repvgg_b2(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:\n\"\"\"Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.5, 2.5, 2.5, 5.0].\n     Refer to the base class `models.RepVGG` for more details.\n     \"\"\"\n    default_cfg = default_cfgs['repvgg_b2']\n    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,\n                      width_multiplier=[2.5, 2.5, 2.5, 5.0], override_group_map=None, deploy=False, **kwargs)\n    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.repvgg.repvgg_b2g4","title":"mindcv.models.repvgg.repvgg_b2g4(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.5, 2.5, 2.5, 5.0]. Refer to the base class models.RepVGG for more details.

Source code in mindcv/models/repvgg.py
@register_model\ndef repvgg_b2g4(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:\n\"\"\"Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.5, 2.5, 2.5, 5.0].\n    Refer to the base class `models.RepVGG` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"repvgg_b2g4\"]\n    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,\n                      width_multiplier=[2.5, 2.5, 2.5, 5.0], override_group_map=g4_map, deploy=False, **kwargs)\n    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.repvgg.repvgg_b3","title":"mindcv.models.repvgg.repvgg_b3(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[3.0, 3.0, 3.0, 5.0]. Refer to the base class models.RepVGG for more details.

Source code in mindcv/models/repvgg.py
@register_model\ndef repvgg_b3(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:\n\"\"\"Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[3.0, 3.0, 3.0, 5.0].\n     Refer to the base class `models.RepVGG` for more details.\n     \"\"\"\n    default_cfg = default_cfgs['repvgg_b3']\n    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,\n                      width_multiplier=[3.0, 3.0, 3.0, 5.0], override_group_map=None, deploy=False, **kwargs)\n    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#res2net","title":"res2net","text":""},{"location":"reference/models/#mindcv.models.res2net.Res2Net","title":"mindcv.models.res2net.Res2Net","text":"

Bases: nn.Cell

Res2Net model class, based on \"Res2Net: A New Multi-scale Backbone Architecture\" <https://arxiv.org/abs/1904.01169>_

PARAMETER DESCRIPTION block

block of resnet.

TYPE: Type[nn.Cell]

layer_nums

number of layers of each stage.

TYPE: List[int]

version

variety of Res2Net, 'res2net' or 'res2net_v1b'. Default: 'res2net'.

TYPE: str DEFAULT: 'res2net'

num_classes

number of classification classes. Default: 1000.

TYPE: int DEFAULT: 1000

in_channels

number the channels of the input. Default: 3.

TYPE: int DEFAULT: 3

groups

number of groups for group conv in blocks. Default: 1.

TYPE: int DEFAULT: 1

base_width

base width of pre group hidden channel in blocks. Default: 26.

TYPE: int DEFAULT: 26

scale

scale factor of Bottle2neck. Default: 4.

DEFAULT: 4

norm

normalization layer in blocks. Default: None.

TYPE: Optional[nn.Cell] DEFAULT: None

Source code in mindcv/models/res2net.py
class Res2Net(nn.Cell):\nr\"\"\"Res2Net model class, based on\n    `\"Res2Net: A New Multi-scale Backbone Architecture\" <https://arxiv.org/abs/1904.01169>`_\n\n    Args:\n        block: block of resnet.\n        layer_nums: number of layers of each stage.\n        version: variety of Res2Net, 'res2net' or 'res2net_v1b'. Default: 'res2net'.\n        num_classes: number of classification classes. Default: 1000.\n        in_channels: number the channels of the input. Default: 3.\n        groups: number of groups for group conv in blocks. Default: 1.\n        base_width: base width of pre group hidden channel in blocks. Default: 26.\n        scale: scale factor of Bottle2neck. Default: 4.\n        norm: normalization layer in blocks. Default: None.\n    \"\"\"\n\n    def __init__(\n        self,\n        block: Type[nn.Cell],\n        layer_nums: List[int],\n        version: str = \"res2net\",\n        num_classes: int = 1000,\n        in_channels: int = 3,\n        groups: int = 1,\n        base_width: int = 26,\n        scale=4,\n        norm: Optional[nn.Cell] = None,\n    ) -> None:\n        super().__init__()\n        assert version in [\"res2net\", \"res2net_v1b\"]\n        self.version = version\n\n        if norm is None:\n            norm = nn.BatchNorm2d\n        self.norm = norm\n\n        self.num_classes = num_classes\n        self.input_channels = 64\n        self.groups = groups\n        self.base_width = base_width\n        self.scale = scale\n        if self.version == \"res2net\":\n            self.conv1 = nn.Conv2d(in_channels, self.input_channels, kernel_size=7,\n                                   stride=2, padding=3, pad_mode=\"pad\")\n        elif self.version == \"res2net_v1b\":\n            self.conv1 = nn.SequentialCell([\n                nn.Conv2d(in_channels, self.input_channels // 2, kernel_size=3,\n                          stride=2, padding=1, pad_mode=\"pad\"),\n                norm(self.input_channels // 2),\n                nn.ReLU(),\n                nn.Conv2d(self.input_channels // 2, self.input_channels // 2, kernel_size=3,\n                          stride=1, padding=1, pad_mode=\"pad\"),\n                norm(self.input_channels // 2),\n                nn.ReLU(),\n                nn.Conv2d(self.input_channels // 2, self.input_channels, kernel_size=3,\n                          stride=1, padding=1, pad_mode=\"pad\"),\n            ])\n\n        self.bn1 = norm(self.input_channels)\n        self.relu = nn.ReLU()\n        self.max_pool = nn.SequentialCell([\n            nn.Pad(paddings=((0, 0), (0, 0), (1, 1), (1, 1)), mode=\"CONSTANT\"),\n            nn.MaxPool2d(kernel_size=3, stride=2)\n        ])\n        self.layer1 = self._make_layer(block, 64, layer_nums[0])\n        self.layer2 = self._make_layer(block, 128, layer_nums[1], stride=2)\n        self.layer3 = self._make_layer(block, 256, layer_nums[2], stride=2)\n        self.layer4 = self._make_layer(block, 512, layer_nums[3], stride=2)\n\n        self.pool = GlobalAvgPooling()\n        self.num_features = 512 * block.expansion\n        self.classifier = nn.Dense(self.num_features, num_classes)\n        self._initialize_weights()\n\n    def _initialize_weights(self) -> None:\n\"\"\"Initialize weights for cells.\"\"\"\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                cell.weight.set_data(\n                    init.initializer(init.HeNormal(math.sqrt(5), mode=\"fan_out\", nonlinearity=\"relu\"),\n                                     cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(\n                        init.initializer(init.HeUniform(math.sqrt(5), mode=\"fan_in\", nonlinearity=\"leaky_relu\"),\n                                         cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, nn.BatchNorm2d):\n                cell.gamma.set_data(init.initializer(\"ones\", cell.gamma.shape, cell.gamma.dtype))\n                cell.beta.set_data(init.initializer(\"zeros\", cell.beta.shape, cell.beta.dtype))\n            elif isinstance(cell, nn.Dense):\n                cell.weight.set_data(\n                    init.initializer(init.HeUniform(math.sqrt(5), mode=\"fan_in\", nonlinearity=\"leaky_relu\"),\n                                     cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n\n    def _make_layer(\n        self,\n        block: Type[nn.Cell],\n        channels: int,\n        block_nums: int,\n        stride: int = 1,\n    ) -> nn.SequentialCell:\n        down_sample = None\n\n        if stride != 1 or self.input_channels != channels * block.expansion:\n            if stride == 1 or self.version == \"res2net\":\n                down_sample = nn.SequentialCell([\n                    nn.Conv2d(self.input_channels, channels * block.expansion, kernel_size=1, stride=stride),\n                    self.norm(channels * block.expansion)\n                ])\n            else:\n                down_sample = nn.SequentialCell([\n                    nn.AvgPool2d(kernel_size=stride, stride=stride, pad_mode=\"same\"),\n                    nn.Conv2d(self.input_channels, channels * block.expansion, kernel_size=1, stride=1),\n                    self.norm(channels * block.expansion)\n                ])\n\n        layers = []\n        layers.append(\n            block(\n                self.input_channels,\n                channels,\n                stride=stride,\n                down_sample=down_sample,\n                groups=self.groups,\n                base_width=self.base_width,\n                scale=self.scale,\n                stype=\"stage\",\n                norm=self.norm,\n            )\n        )\n        self.input_channels = channels * block.expansion\n\n        for _ in range(1, block_nums):\n            layers.append(\n                block(\n                    self.input_channels,\n                    channels,\n                    groups=self.groups,\n                    base_width=self.base_width,\n                    scale=self.scale,\n                    norm=self.norm,\n                )\n            )\n\n        return nn.SequentialCell(layers)\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.max_pool(x)\n\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n        return x\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.pool(x)\n        x = self.classifier(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.res2net.res2net101","title":"mindcv.models.res2net.res2net101(pretrained=False, num_classes=1001, in_channels=3, **kwargs)","text":"

Get 101 layers Res2Net model. Refer to the base class models.Res2Net for more details.

Source code in mindcv/models/res2net.py
@register_model\ndef res2net101(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):\n\"\"\"Get 101 layers Res2Net model.\n    Refer to the base class `models.Res2Net` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"res2net101\"]\n    model = Res2Net(Bottle2neck, [3, 4, 23, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.res2net.res2net101_v1b","title":"mindcv.models.res2net.res2net101_v1b(pretrained=False, num_classes=1001, in_channels=3, **kwargs)","text":"Source code in mindcv/models/res2net.py
@register_model\ndef res2net101_v1b(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"res2net101_v1b\"]\n    model = Res2Net(Bottle2neck, [3, 4, 23, 3], version=\"res2net_v1b\", num_classes=num_classes,\n                    in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.res2net.res2net152","title":"mindcv.models.res2net.res2net152(pretrained=False, num_classes=1001, in_channels=3, **kwargs)","text":"

Get 152 layers Res2Net model. Refer to the base class models.Res2Net for more details.

Source code in mindcv/models/res2net.py
@register_model\ndef res2net152(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):\n\"\"\"Get 152 layers Res2Net model.\n    Refer to the base class `models.Res2Net` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"res2net152\"]\n    model = Res2Net(Bottle2neck, [3, 8, 36, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.res2net.res2net152_v1b","title":"mindcv.models.res2net.res2net152_v1b(pretrained=False, num_classes=1001, in_channels=3, **kwargs)","text":"Source code in mindcv/models/res2net.py
@register_model\ndef res2net152_v1b(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"res2net152_v1b\"]\n    model = Res2Net(Bottle2neck, [3, 8, 36, 3], version=\"res2net_v1b\", num_classes=num_classes,\n                    in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.res2net.res2net50","title":"mindcv.models.res2net.res2net50(pretrained=False, num_classes=1001, in_channels=3, **kwargs)","text":"

Get 50 layers Res2Net model. Refer to the base class models.Res2Net for more details.

Source code in mindcv/models/res2net.py
@register_model\ndef res2net50(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):\n\"\"\"Get 50 layers Res2Net model.\n    Refer to the base class `models.Res2Net` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"res2net50\"]\n    model = Res2Net(Bottle2neck, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.res2net.res2net50_v1b","title":"mindcv.models.res2net.res2net50_v1b(pretrained=False, num_classes=1001, in_channels=3, **kwargs)","text":"Source code in mindcv/models/res2net.py
@register_model\ndef res2net50_v1b(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"res2net50_v1b\"]\n    model = Res2Net(Bottle2neck, [3, 4, 6, 3], version=\"res2net_v1b\", num_classes=num_classes,\n                    in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#resnest","title":"resnest","text":""},{"location":"reference/models/#mindcv.models.resnest.ResNeSt","title":"mindcv.models.resnest.ResNeSt","text":"

Bases: nn.Cell

ResNeSt model class, based on \"ResNeSt: Split-Attention Networks\" <https://arxiv.org/abs/2004.08955>_

PARAMETER DESCRIPTION block

Class for the residual block. Option is Bottleneck.

TYPE: Type[Bottleneck]

layers

Numbers of layers in each block.

TYPE: List[int]

radix

Number of groups for Split-Attention conv. Default: 1.

TYPE: int DEFAULT: 1

group

Number of groups for the conv in each bottleneck block. Default: 1.

TYPE: int DEFAULT: 1

bottleneck_width

bottleneck channels factor. Default: 64.

TYPE: int DEFAULT: 64

num_classes

Number of classification classes. Default: 1000.

TYPE: int DEFAULT: 1000

dilated

Applying dilation strategy to pretrained ResNeSt yielding a stride-8 model, typically used in Semantic Segmentation. Default: False.

TYPE: bool DEFAULT: False

dilation

Number of dilation in the conv. Default: 1.

TYPE: int DEFAULT: 1

deep_stem

three 3x3 convolution layers of widths stem_width, stem_width, stem_width * 2. Default: False.

TYPE: bool DEFAULT: False

stem_width

number of channels in stem convolutions. Default: 64.

TYPE: int DEFAULT: 64

avg_down

use avg pooling for projection skip connection between stages/downsample. Default: False.

TYPE: bool DEFAULT: False

avd

use avg pooling before or after split-attention conv. Default: False.

TYPE: bool DEFAULT: False

avd_first

use avg pooling before or after split-attention conv. Default: False.

TYPE: bool DEFAULT: False

drop_rate

Drop probability for the Dropout layer. Default: 0.

TYPE: float DEFAULT: 0.0

norm_layer

Normalization layer used in backbone network. Default: nn.BatchNorm2d.

TYPE: nn.Cell DEFAULT: nn.BatchNorm2d

Source code in mindcv/models/resnest.py
class ResNeSt(nn.Cell):\nr\"\"\"ResNeSt model class, based on\n    `\"ResNeSt: Split-Attention Networks\" <https://arxiv.org/abs/2004.08955>`_\n\n    Args:\n        block: Class for the residual block. Option is Bottleneck.\n        layers: Numbers of layers in each block.\n        radix: Number of groups for Split-Attention conv. Default: 1.\n        group: Number of groups for the conv in each bottleneck block. Default: 1.\n        bottleneck_width: bottleneck channels factor. Default: 64.\n        num_classes: Number of classification classes. Default: 1000.\n        dilated: Applying dilation strategy to pretrained ResNeSt yielding a stride-8 model,\n                 typically used in Semantic Segmentation. Default: False.\n        dilation: Number of dilation in the conv. Default: 1.\n        deep_stem: three 3x3 convolution layers of widths stem_width, stem_width, stem_width * 2.\n                   Default: False.\n        stem_width: number of channels in stem convolutions. Default: 64.\n        avg_down: use avg pooling for projection skip connection between stages/downsample.\n                  Default: False.\n        avd: use avg pooling before or after split-attention conv. Default: False.\n        avd_first: use avg pooling before or after split-attention conv. Default: False.\n        drop_rate: Drop probability for the Dropout layer. Default: 0.\n        norm_layer: Normalization layer used in backbone network. Default: nn.BatchNorm2d.\n    \"\"\"\n\n    def __init__(\n        self,\n        block: Type[Bottleneck],\n        layers: List[int],\n        radix: int = 1,\n        group: int = 1,\n        bottleneck_width: int = 64,\n        num_classes: int = 1000,\n        dilated: bool = False,\n        dilation: int = 1,\n        deep_stem: bool = False,\n        stem_width: int = 64,\n        avg_down: bool = False,\n        avd: bool = False,\n        avd_first: bool = False,\n        drop_rate: float = 0.0,\n        norm_layer: nn.Cell = nn.BatchNorm2d,\n    ) -> None:\n        super(ResNeSt, self).__init__()\n        self.cardinality = group\n        self.bottleneck_width = bottleneck_width\n        # ResNet-D params\n        self.inplanes = stem_width * 2 if deep_stem else 64\n        self.avg_down = avg_down\n        # ResNeSt params\n        self.radix = radix\n        self.avd = avd\n        self.avd_first = avd_first\n\n        if deep_stem:\n            self.conv1 = nn.SequentialCell([\n                nn.Conv2d(3, stem_width, kernel_size=3, stride=2, pad_mode=\"pad\",\n                          padding=1, has_bias=False),\n                norm_layer(stem_width),\n                nn.ReLU(),\n                nn.Conv2d(stem_width, stem_width, kernel_size=3, stride=1, pad_mode=\"pad\",\n                          padding=1, has_bias=False),\n                norm_layer(stem_width),\n                nn.ReLU(),\n                nn.Conv2d(stem_width, stem_width * 2, kernel_size=3, stride=1, pad_mode=\"pad\",\n                          padding=1, has_bias=False),\n            ])\n        else:\n            self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, pad_mode=\"pad\", padding=3,\n                                   has_bias=False)\n\n        self.bn1 = norm_layer(self.inplanes)\n        self.relu = nn.ReLU()\n        self.feature_info = [dict(chs=self.inplanes, reduction=2, name=\"relu\")]\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode=\"same\")\n\n        self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer, is_first=False)\n        self.feature_info.append(dict(chs=block.expansion * 64, reduction=4, name='layer1'))\n        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_layer=norm_layer)\n        self.feature_info.append(dict(chs=block.expansion * 128, reduction=8, name='layer2'))\n\n        if dilated or dilation == 4:\n            self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2, norm_layer=norm_layer)\n            self.feature_info.append(dict(chs=block.expansion * 256, reduction=8, name='layer3'))\n            self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, norm_layer=norm_layer)\n            self.feature_info.append(dict(chs=block.expansion * 512, reduction=8, name='layer4'))\n        elif dilation == 2:\n            self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilation=1, norm_layer=norm_layer)\n            self.feature_info.append(dict(chs=block.expansion * 256, reduction=16, name='layer3'))\n            self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=2, norm_layer=norm_layer)\n            self.feature_info.append(dict(chs=block.expansion * 512, reduction=16, name='layer4'))\n        else:\n            self.layer3 = self._make_layer(block, 256, layers[2], stride=2, norm_layer=norm_layer)\n            self.feature_info.append(dict(chs=block.expansion * 256, reduction=16, name='layer3'))\n            self.layer4 = self._make_layer(block, 512, layers[3], stride=2, norm_layer=norm_layer)\n            self.feature_info.append(dict(chs=block.expansion * 512, reduction=32, name='layer4'))\n\n        self.avgpool = GlobalAvgPooling()\n        self.drop = Dropout(p=drop_rate) if drop_rate > 0.0 else None\n        self.fc = nn.Dense(512 * block.expansion, num_classes)\n\n        self._initialize_weights()\n\n    def _initialize_weights(self) -> None:\n\"\"\"Initialize weights for cells.\"\"\"\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                cell.weight.set_data(\n                    init.initializer(\n                        init.HeNormal(mode=\"fan_out\", nonlinearity=\"relu\"), cell.weight.shape, cell.weight.dtype\n                    )\n                )\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, nn.BatchNorm2d):\n                cell.gamma.set_data(init.initializer(\"ones\", cell.gamma.shape, cell.gamma.dtype))\n                cell.beta.set_data(init.initializer(\"zeros\", cell.beta.shape, cell.beta.dtype))\n            elif isinstance(cell, nn.Dense):\n                cell.weight.set_data(\n                    init.initializer(\n                        init.HeUniform(mode=\"fan_in\", nonlinearity=\"sigmoid\"), cell.weight.shape, cell.weight.dtype\n                    )\n                )\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n\n    def _make_layer(\n        self,\n        block: Type[Bottleneck],\n        planes: int,\n        blocks: int,\n        stride: int = 1,\n        dilation: int = 1,\n        norm_layer: Optional[nn.Cell] = None,\n        is_first: bool = True,\n    ) -> nn.SequentialCell:\n        downsample = None\n        if stride != 1 or self.inplanes != planes * block.expansion:\n            down_layers = []\n            if self.avg_down:\n                if dilation == 1:\n                    down_layers.append(nn.AvgPool2d(kernel_size=stride, stride=stride, pad_mode=\"valid\"))\n                else:\n                    down_layers.append(nn.AvgPool2d(kernel_size=1, stride=1, pad_mode=\"valid\"))\n\n                down_layers.append(nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1,\n                                             stride=1, has_bias=False))\n            else:\n                down_layers.append(\n                    nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride,\n                              has_bias=False))\n            down_layers.append(norm_layer(planes * block.expansion))\n            downsample = nn.SequentialCell(down_layers)\n\n        layers = []\n        if dilation == 1 or dilation == 2:\n            layers.append(\n                block(\n                    self.inplanes,\n                    planes,\n                    stride,\n                    downsample=downsample,\n                    radix=self.radix,\n                    cardinality=self.cardinality,\n                    bottleneck_width=self.bottleneck_width,\n                    avd=self.avd,\n                    avd_first=self.avd_first,\n                    dilation=1,\n                    is_first=is_first,\n                    norm_layer=norm_layer,\n                )\n            )\n        elif dilation == 4:\n            layers.append(\n                block(\n                    self.inplanes,\n                    planes,\n                    stride,\n                    downsample=downsample,\n                    radix=self.radix,\n                    cardinality=self.cardinality,\n                    bottleneck_width=self.bottleneck_width,\n                    avd=self.avd,\n                    avd_first=self.avd_first,\n                    dilation=2,\n                    is_first=is_first,\n                    norm_layer=norm_layer,\n                )\n            )\n        else:\n            raise ValueError(f\"Unsupported model type {dilation}\")\n\n        self.inplanes = planes * block.expansion\n        for i in range(1, blocks):\n            layers.append(\n                block(\n                    self.inplanes,\n                    planes,\n                    radix=self.radix,\n                    cardinality=self.cardinality,\n                    bottleneck_width=self.bottleneck_width,\n                    avd=self.avd,\n                    avd_first=self.avd_first,\n                    dilation=dilation,\n                    norm_layer=norm_layer,\n                )\n            )\n\n        return nn.SequentialCell(layers)\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n        return x\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.avgpool(x)\n        if self.drop:\n            x = self.drop(x)\n        x = self.fc(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.resnest.resnest101","title":"mindcv.models.resnest.resnest101(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/resnest.py
@register_model\ndef resnest101(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"resnest101\"]\n    model_args = dict(block=Bottleneck, layers=[3, 4, 23, 3], radix=2, group=1,\n                      bottleneck_width=64, num_classes=num_classes,\n                      deep_stem=True, stem_width=64, avg_down=True,\n                      avd=True, avd_first=False, **kwargs)\n    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.resnest.resnest14","title":"mindcv.models.resnest.resnest14(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/resnest.py
@register_model\ndef resnest14(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"resnest14\"]\n    model_args = dict(block=Bottleneck, layers=[1, 1, 1, 1], radix=2, group=1,\n                      bottleneck_width=64, num_classes=num_classes,\n                      deep_stem=True, stem_width=32, avg_down=True,\n                      avd=True, avd_first=False, **kwargs)\n    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.resnest.resnest200","title":"mindcv.models.resnest.resnest200(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/resnest.py
@register_model\ndef resnest200(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"resnest200\"]\n    model_args = dict(block=Bottleneck, layers=[3, 24, 36, 3], radix=2, group=1,\n                      bottleneck_width=64, num_classes=num_classes,\n                      deep_stem=True, stem_width=64, avg_down=True,\n                      avd=True, avd_first=False, **kwargs)\n    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.resnest.resnest26","title":"mindcv.models.resnest.resnest26(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/resnest.py
@register_model\ndef resnest26(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"resnest26\"]\n    model_args = dict(block=Bottleneck, layers=[2, 2, 2, 2], radix=2, group=1,\n                      bottleneck_width=64, num_classes=num_classes,\n                      deep_stem=True, stem_width=32, avg_down=True,\n                      avd=True, avd_first=False, **kwargs)\n    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.resnest.resnest269","title":"mindcv.models.resnest.resnest269(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/resnest.py
@register_model\ndef resnest269(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"resnest269\"]\n    model_args = dict(block=Bottleneck, layers=[3, 30, 48, 8], radix=2, group=1,\n                      bottleneck_width=64, num_classes=num_classes,\n                      deep_stem=True, stem_width=64, avg_down=True,\n                      avd=True, avd_first=False, **kwargs)\n    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.resnest.resnest50","title":"mindcv.models.resnest.resnest50(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/resnest.py
@register_model\ndef resnest50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"resnest50\"]\n    model_args = dict(block=Bottleneck, layers=[3, 4, 6, 3], radix=2, group=1,\n                      bottleneck_width=64, num_classes=num_classes,\n                      deep_stem=True, stem_width=32, avg_down=True,\n                      avd=True, avd_first=False, **kwargs)\n    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#resnet","title":"resnet","text":""},{"location":"reference/models/#mindcv.models.resnet.ResNet","title":"mindcv.models.resnet.ResNet","text":"

Bases: nn.Cell

ResNet model class, based on \"Deep Residual Learning for Image Recognition\" <https://arxiv.org/abs/1512.03385>_

PARAMETER DESCRIPTION block

block of resnet.

TYPE: Type[Union[BasicBlock, Bottleneck]]

layers

number of layers of each stage.

TYPE: List[int]

num_classes

number of classification classes. Default: 1000.

TYPE: int DEFAULT: 1000

in_channels

number the channels of the input. Default: 3.

TYPE: int DEFAULT: 3

groups

number of groups for group conv in blocks. Default: 1.

TYPE: int DEFAULT: 1

base_width

base width of pre group hidden channel in blocks. Default: 64.

TYPE: int DEFAULT: 64

norm

normalization layer in blocks. Default: None.

TYPE: Optional[nn.Cell] DEFAULT: None

Source code in mindcv/models/resnet.py
class ResNet(nn.Cell):\nr\"\"\"ResNet model class, based on\n    `\"Deep Residual Learning for Image Recognition\" <https://arxiv.org/abs/1512.03385>`_\n\n    Args:\n        block: block of resnet.\n        layers: number of layers of each stage.\n        num_classes: number of classification classes. Default: 1000.\n        in_channels: number the channels of the input. Default: 3.\n        groups: number of groups for group conv in blocks. Default: 1.\n        base_width: base width of pre group hidden channel in blocks. Default: 64.\n        norm: normalization layer in blocks. Default: None.\n    \"\"\"\n\n    def __init__(\n        self,\n        block: Type[Union[BasicBlock, Bottleneck]],\n        layers: List[int],\n        num_classes: int = 1000,\n        in_channels: int = 3,\n        groups: int = 1,\n        base_width: int = 64,\n        norm: Optional[nn.Cell] = None,\n    ) -> None:\n        super().__init__()\n        if norm is None:\n            norm = nn.BatchNorm2d\n\n        self.norm: nn.Cell = norm  # add type hints to make pylint happy\n        self.input_channels = 64\n        self.groups = groups\n        self.base_with = base_width\n\n        self.conv1 = nn.Conv2d(in_channels, self.input_channels, kernel_size=7,\n                               stride=2, pad_mode=\"pad\", padding=3)\n        self.bn1 = norm(self.input_channels)\n        self.relu = nn.ReLU()\n        self.feature_info = [dict(chs=self.input_channels, reduction=2, name=\"relu\")]\n        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode=\"same\")\n        self.layer1 = self._make_layer(block, 64, layers[0])\n        self.feature_info.append(dict(chs=block.expansion * 64, reduction=4, name=\"layer1\"))\n        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)\n        self.feature_info.append(dict(chs=block.expansion * 128, reduction=8, name=\"layer2\"))\n        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)\n        self.feature_info.append(dict(chs=block.expansion * 256, reduction=16, name=\"layer3\"))\n        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)\n        self.feature_info.append(dict(chs=block.expansion * 512, reduction=32, name=\"layer4\"))\n\n        self.pool = GlobalAvgPooling()\n        self.num_features = 512 * block.expansion\n        self.classifier = nn.Dense(self.num_features, num_classes)\n\n        self._initialize_weights()\n\n    def _initialize_weights(self) -> None:\n\"\"\"Initialize weights for cells.\"\"\"\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                cell.weight.set_data(\n                    init.initializer(init.HeNormal(mode='fan_out', nonlinearity='relu'),\n                                     cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(\n                        init.initializer('zeros', cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, nn.BatchNorm2d):\n                cell.gamma.set_data(init.initializer('ones', cell.gamma.shape, cell.gamma.dtype))\n                cell.beta.set_data(init.initializer('zeros', cell.beta.shape, cell.beta.dtype))\n            elif isinstance(cell, nn.Dense):\n                cell.weight.set_data(\n                    init.initializer(init.HeUniform(mode='fan_in', nonlinearity='sigmoid'),\n                                     cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer('zeros', cell.bias.shape, cell.bias.dtype))\n\n    def _make_layer(\n        self,\n        block: Type[Union[BasicBlock, Bottleneck]],\n        channels: int,\n        block_nums: int,\n        stride: int = 1,\n    ) -> nn.SequentialCell:\n\"\"\"build model depending on cfgs\"\"\"\n        down_sample = None\n\n        if stride != 1 or self.input_channels != channels * block.expansion:\n            down_sample = nn.SequentialCell([\n                nn.Conv2d(self.input_channels, channels * block.expansion, kernel_size=1, stride=stride),\n                self.norm(channels * block.expansion)\n            ])\n\n        layers = []\n        layers.append(\n            block(\n                self.input_channels,\n                channels,\n                stride=stride,\n                down_sample=down_sample,\n                groups=self.groups,\n                base_width=self.base_with,\n                norm=self.norm,\n            )\n        )\n        self.input_channels = channels * block.expansion\n\n        for _ in range(1, block_nums):\n            layers.append(\n                block(\n                    self.input_channels,\n                    channels,\n                    groups=self.groups,\n                    base_width=self.base_with,\n                    norm=self.norm\n                )\n            )\n\n        return nn.SequentialCell(layers)\n\n    def forward_features(self, x: Tensor) -> Tensor:\n\"\"\"Network forward feature extraction.\"\"\"\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.max_pool(x)\n\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n        return x\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.pool(x)\n        x = self.classifier(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.resnet.ResNet.forward_features","title":"mindcv.models.resnet.ResNet.forward_features(x)","text":"

Network forward feature extraction.

Source code in mindcv/models/resnet.py
def forward_features(self, x: Tensor) -> Tensor:\n\"\"\"Network forward feature extraction.\"\"\"\n    x = self.conv1(x)\n    x = self.bn1(x)\n    x = self.relu(x)\n    x = self.max_pool(x)\n\n    x = self.layer1(x)\n    x = self.layer2(x)\n    x = self.layer3(x)\n    x = self.layer4(x)\n    return x\n
"},{"location":"reference/models/#mindcv.models.resnet.resnet101","title":"mindcv.models.resnet.resnet101(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 101 layers ResNet model. Refer to the base class models.ResNet for more details.

Source code in mindcv/models/resnet.py
@register_model\ndef resnet101(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n\"\"\"Get 101 layers ResNet model.\n    Refer to the base class `models.ResNet` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"resnet101\"]\n    model_args = dict(block=Bottleneck, layers=[3, 4, 23, 3], num_classes=num_classes, in_channels=in_channels,\n                      **kwargs)\n    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.resnet.resnet152","title":"mindcv.models.resnet.resnet152(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 152 layers ResNet model. Refer to the base class models.ResNet for more details.

Source code in mindcv/models/resnet.py
@register_model\ndef resnet152(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n\"\"\"Get 152 layers ResNet model.\n    Refer to the base class `models.ResNet` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"resnet152\"]\n    model_args = dict(block=Bottleneck, layers=[3, 8, 36, 3], num_classes=num_classes, in_channels=in_channels,\n                      **kwargs)\n    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.resnet.resnet18","title":"mindcv.models.resnet.resnet18(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 18 layers ResNet model. Refer to the base class models.ResNet for more details.

Source code in mindcv/models/resnet.py
@register_model\ndef resnet18(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n\"\"\"Get 18 layers ResNet model.\n    Refer to the base class `models.ResNet` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"resnet18\"]\n    model_args = dict(block=BasicBlock, layers=[2, 2, 2, 2], num_classes=num_classes, in_channels=in_channels,\n                      **kwargs)\n    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.resnet.resnet34","title":"mindcv.models.resnet.resnet34(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 34 layers ResNet model. Refer to the base class models.ResNet for more details.

Source code in mindcv/models/resnet.py
@register_model\ndef resnet34(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n\"\"\"Get 34 layers ResNet model.\n    Refer to the base class `models.ResNet` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"resnet34\"]\n    model_args = dict(block=BasicBlock, layers=[3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels,\n                      **kwargs)\n    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.resnet.resnet50","title":"mindcv.models.resnet.resnet50(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 50 layers ResNet model. Refer to the base class models.ResNet for more details.

Source code in mindcv/models/resnet.py
@register_model\ndef resnet50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n\"\"\"Get 50 layers ResNet model.\n    Refer to the base class `models.ResNet` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"resnet50\"]\n    model_args = dict(block=Bottleneck, layers=[3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels,\n                      **kwargs)\n    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.resnet.resnext101_32x4d","title":"mindcv.models.resnet.resnext101_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 101 layers ResNeXt model with 32 groups of GPConv. Refer to the base class models.ResNet for more details.

Source code in mindcv/models/resnet.py
@register_model\ndef resnext101_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n\"\"\"Get 101 layers ResNeXt model with 32 groups of GPConv.\n    Refer to the base class `models.ResNet` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"resnext101_32x4d\"]\n    model_args = dict(block=Bottleneck, layers=[3, 4, 23, 3], groups=32, base_width=4, num_classes=num_classes,\n                      in_channels=in_channels, **kwargs)\n    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.resnet.resnext101_64x4d","title":"mindcv.models.resnet.resnext101_64x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 101 layers ResNeXt model with 64 groups of GPConv. Refer to the base class models.ResNet for more details.

Source code in mindcv/models/resnet.py
@register_model\ndef resnext101_64x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n\"\"\"Get 101 layers ResNeXt model with 64 groups of GPConv.\n    Refer to the base class `models.ResNet` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"resnext101_64x4d\"]\n    model_args = dict(block=Bottleneck, layers=[3, 4, 23, 3], groups=64, base_width=4, num_classes=num_classes,\n                      in_channels=in_channels, **kwargs)\n    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.resnet.resnext152_64x4d","title":"mindcv.models.resnet.resnext152_64x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/resnet.py
@register_model\ndef resnext152_64x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"resnext152_64x4d\"]\n    model_args = dict(block=Bottleneck, layers=[3, 8, 36, 3], groups=64, base_width=4, num_classes=num_classes,\n                      in_channels=in_channels, **kwargs)\n    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#mindcv.models.resnet.resnext50_32x4d","title":"mindcv.models.resnet.resnext50_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 50 layers ResNeXt model with 32 groups of GPConv. Refer to the base class models.ResNet for more details.

Source code in mindcv/models/resnet.py
@register_model\ndef resnext50_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n\"\"\"Get 50 layers ResNeXt model with 32 groups of GPConv.\n    Refer to the base class `models.ResNet` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"resnext50_32x4d\"]\n    model_args = dict(block=Bottleneck, layers=[3, 4, 6, 3], groups=32, base_width=4, num_classes=num_classes,\n                      in_channels=in_channels, **kwargs)\n    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))\n
"},{"location":"reference/models/#resnetv2","title":"resnetv2","text":""},{"location":"reference/models/#mindcv.models.resnetv2.resnetv2_101","title":"mindcv.models.resnetv2.resnetv2_101(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 101 layers ResNetV2 model. Refer to the base class models.ResNet for more details.

Source code in mindcv/models/resnetv2.py
@register_model\ndef resnetv2_101(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n\"\"\"Get 101 layers ResNetV2 model.\n    Refer to the base class `models.ResNet` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"resnetv2_101\"]\n    model = ResNet(PreActBottleneck, [3, 4, 23, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.resnetv2.resnetv2_50","title":"mindcv.models.resnetv2.resnetv2_50(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 50 layers ResNetV2 model. Refer to the base class models.ResNet for more details.

Source code in mindcv/models/resnetv2.py
@register_model\ndef resnetv2_50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n\"\"\"Get 50 layers ResNetV2 model.\n    Refer to the base class `models.ResNet` for more details.\n    \"\"\"\n    default_cfg = default_cfgs['resnetv2_50']\n    model = ResNet(PreActBottleneck, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#rexnet","title":"rexnet","text":""},{"location":"reference/models/#mindcv.models.rexnet.ReXNetV1","title":"mindcv.models.rexnet.ReXNetV1","text":"

Bases: nn.Cell

ReXNet model class, based on \"Rethinking Channel Dimensions for Efficient Model Design\" <https://arxiv.org/abs/2007.00992>_

PARAMETER DESCRIPTION in_channels

number of the input channels. Default: 3.

TYPE: int DEFAULT: 3

fi_channels

number of the final channels. Default: 180.

TYPE: int DEFAULT: 180

initial_channels

initialize inplanes. Default: 16.

TYPE: int DEFAULT: 16

width_mult

The ratio of the channel. Default: 1.0.

TYPE: float DEFAULT: 1.0

depth_mult

The ratio of num_layers. Default: 1.0.

TYPE: float DEFAULT: 1.0

num_classes

number of classification classes. Default: 1000.

TYPE: int) DEFAULT: 1000

use_se

use SENet in LinearBottleneck. Default: True.

TYPE: bool DEFAULT: True

se_ratio

(float): SENet reduction ratio. Default 1/12.

DEFAULT: 1 / 12

drop_rate

dropout ratio. Default: 0.2.

TYPE: float DEFAULT: 0.2

ch_div

divisible by ch_div. Default: 1.

TYPE: int DEFAULT: 1

act_layer

activation function in ConvNormAct. Default: nn.SiLU.

TYPE: nn.Cell DEFAULT: nn.SiLU

dw_act_layer

activation function after dw_conv. Default: nn.ReLU6.

TYPE: nn.Cell DEFAULT: nn.ReLU6

cls_useconv

use conv in classification. Default: False.

TYPE: bool DEFAULT: False

Source code in mindcv/models/rexnet.py
class ReXNetV1(nn.Cell):\nr\"\"\"ReXNet model class, based on\n    `\"Rethinking Channel Dimensions for Efficient Model Design\" <https://arxiv.org/abs/2007.00992>`_\n\n    Args:\n        in_channels (int): number of the input channels. Default: 3.\n        fi_channels (int): number of the final channels. Default: 180.\n        initial_channels (int): initialize inplanes. Default: 16.\n        width_mult (float): The ratio of the channel. Default: 1.0.\n        depth_mult (float): The ratio of num_layers. Default: 1.0.\n        num_classes (int) : number of classification classes. Default: 1000.\n        use_se (bool): use SENet in LinearBottleneck. Default: True.\n        se_ratio: (float): SENet reduction ratio. Default 1/12.\n        drop_rate (float): dropout ratio. Default: 0.2.\n        ch_div (int): divisible by ch_div. Default: 1.\n        act_layer (nn.Cell): activation function in ConvNormAct. Default: nn.SiLU.\n        dw_act_layer (nn.Cell): activation function after dw_conv. Default: nn.ReLU6.\n        cls_useconv (bool): use conv in classification. Default: False.\n    \"\"\"\n\n    def __init__(\n        self,\n        in_channels=3,\n        fi_channels=180,\n        initial_channels=16,\n        width_mult=1.0,\n        depth_mult=1.0,\n        num_classes=1000,\n        use_se=True,\n        se_ratio=1 / 12,\n        drop_rate=0.2,\n        drop_path_rate=0.0,\n        ch_div=1,\n        act_layer=nn.SiLU,\n        dw_act_layer=nn.ReLU6,\n        cls_useconv=False,\n    ):\n        super(ReXNetV1, self).__init__()\n\n        layers = [1, 2, 2, 3, 3, 5]\n        strides = [1, 2, 2, 2, 1, 2]\n        use_ses = [False, False, True, True, True, True]\n\n        layers = [ceil(element * depth_mult) for element in layers]\n        strides = sum([[element] + [1] * (layers[idx] - 1)\n                       for idx, element in enumerate(strides)], [])\n        if use_se:\n            use_ses = sum([[element] * layers[idx] for idx, element in enumerate(use_ses)], [])\n        else:\n            use_ses = [False] * sum(layers[:])\n        exp_ratios = [1] * layers[0] + [6] * sum(layers[1:])\n\n        self.depth = sum(layers[:]) * 3\n        stem_channel = 32 / width_mult if width_mult < 1.0 else 32\n        inplanes = initial_channels / width_mult if width_mult < 1.0 else initial_channels\n\n        features = []\n        in_channels_group = []\n        out_channels_group = []\n\n        for i in range(self.depth // 3):\n            if i == 0:\n                in_channels_group.append(int(round(stem_channel * width_mult)))\n                out_channels_group.append(int(round(inplanes * width_mult)))\n            else:\n                in_channels_group.append(int(round(inplanes * width_mult)))\n                inplanes += fi_channels / (self.depth // 3 * 1.0)\n                out_channels_group.append(int(round(inplanes * width_mult)))\n\n        stem_chs = make_divisible(round(stem_channel * width_mult), divisor=ch_div)\n        self.stem = Conv2dNormActivation(in_channels, stem_chs, stride=2, padding=1, activation=act_layer)\n\n        feat_chs = [stem_chs]\n        self.feature_info = []\n        curr_stride = 2\n        features = []\n        num_blocks = len(in_channels_group)\n        for block_idx, (in_c, out_c, exp_ratio, stride, use_se) in enumerate(\n            zip(in_channels_group, out_channels_group, exp_ratios, strides, use_ses)\n        ):\n            if stride > 1:\n                fname = \"stem\" if block_idx == 0 else f\"features.{block_idx - 1}\"\n                self.feature_info += [dict(chs=feat_chs[-1], reduction=curr_stride, name=fname)]\n            block_dpr = drop_path_rate * block_idx / (num_blocks - 1)  # stochastic depth linear decay rule\n            drop_path = DropPath(block_dpr) if block_dpr > 0. else None\n            features.append(LinearBottleneck(in_channels=in_c,\n                                             out_channels=out_c,\n                                             exp_ratio=exp_ratio,\n                                             stride=stride,\n                                             use_se=use_se,\n                                             se_ratio=se_ratio,\n                                             act_layer=act_layer,\n                                             dw_act_layer=dw_act_layer,\n                                             drop_path=drop_path))\n            curr_stride *= stride\n            feat_chs.append(out_c)\n\n        pen_channels = make_divisible(int(1280 * width_mult), divisor=ch_div)\n        self.feature_info += [dict(chs=feat_chs[-1], reduction=curr_stride, name=f'features.{len(features) - 1}')]\n        self.flatten_sequential = True\n        features.append(Conv2dNormActivation(out_channels_group[-1],\n                                             pen_channels,\n                                             kernel_size=1,\n                                             activation=act_layer))\n\n        features.append(GlobalAvgPooling(keep_dims=True))\n        self.useconv = cls_useconv\n        self.features = nn.SequentialCell(*features)\n        if self.useconv:\n            self.cls = nn.SequentialCell(\n                Dropout(p=drop_rate),\n                nn.Conv2d(pen_channels, num_classes, 1, has_bias=True))\n        else:\n            self.cls = nn.SequentialCell(\n                Dropout(p=drop_rate),\n                nn.Dense(pen_channels, num_classes))\n        self._initialize_weights()\n\n    def _initialize_weights(self) -> None:\n\"\"\"Initialize weights for cells.\"\"\"\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, (nn.Conv2d, nn.Dense)):\n                cell.weight.set_data(\n                    init.initializer(init.HeUniform(math.sqrt(5), mode=\"fan_in\", nonlinearity=\"relu\"),\n                                     cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(\n                        init.initializer(init.HeUniform(math.sqrt(5), mode=\"fan_in\", nonlinearity=\"leaky_relu\"),\n                                         [1, cell.bias.shape[0]], cell.bias.dtype).reshape((-1)))\n\n    def forward_features(self, x):\n        x = self.stem(x)\n        x = self.features(x)\n        return x\n\n    def forward_head(self, x):\n        if not self.useconv:\n            x = x.reshape((x.shape[0], -1))\n            x = self.cls(x)\n        else:\n            x = self.cls(x).reshape((x.shape[0], -1))\n        return x\n\n    def construct(self, x):\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.rexnet.rexnet_09","title":"mindcv.models.rexnet.rexnet_09(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ReXNet model with width multiplier of 0.9. Refer to the base class models.ReXNetV1 for more details.

Source code in mindcv/models/rexnet.py
@register_model\ndef rexnet_09(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ReXNetV1:\n\"\"\"Get ReXNet model with width multiplier of 0.9.\n    Refer to the base class `models.ReXNetV1` for more details.\n    \"\"\"\n    return _rexnet(\"rexnet_09\", 0.9, in_channels, num_classes, pretrained, **kwargs)\n
"},{"location":"reference/models/#mindcv.models.rexnet.rexnet_10","title":"mindcv.models.rexnet.rexnet_10(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ReXNet model with width multiplier of 1.0. Refer to the base class models.ReXNetV1 for more details.

Source code in mindcv/models/rexnet.py
@register_model\ndef rexnet_10(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ReXNetV1:\n\"\"\"Get ReXNet model with width multiplier of 1.0.\n    Refer to the base class `models.ReXNetV1` for more details.\n    \"\"\"\n    return _rexnet(\"rexnet_10\", 1.0, in_channels, num_classes, pretrained, **kwargs)\n
"},{"location":"reference/models/#mindcv.models.rexnet.rexnet_13","title":"mindcv.models.rexnet.rexnet_13(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ReXNet model with width multiplier of 1.3. Refer to the base class models.ReXNetV1 for more details.

Source code in mindcv/models/rexnet.py
@register_model\ndef rexnet_13(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ReXNetV1:\n\"\"\"Get ReXNet model with width multiplier of 1.3.\n    Refer to the base class `models.ReXNetV1` for more details.\n    \"\"\"\n    return _rexnet(\"rexnet_13\", 1.3, in_channels, num_classes, pretrained, **kwargs)\n
"},{"location":"reference/models/#mindcv.models.rexnet.rexnet_15","title":"mindcv.models.rexnet.rexnet_15(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ReXNet model with width multiplier of 1.5. Refer to the base class models.ReXNetV1 for more details.

Source code in mindcv/models/rexnet.py
@register_model\ndef rexnet_15(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ReXNetV1:\n\"\"\"Get ReXNet model with width multiplier of 1.5.\n    Refer to the base class `models.ReXNetV1` for more details.\n    \"\"\"\n    return _rexnet(\"rexnet_15\", 1.5, in_channels, num_classes, pretrained, **kwargs)\n
"},{"location":"reference/models/#mindcv.models.rexnet.rexnet_20","title":"mindcv.models.rexnet.rexnet_20(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ReXNet model with width multiplier of 2.0. Refer to the base class models.ReXNetV1 for more details.

Source code in mindcv/models/rexnet.py
@register_model\ndef rexnet_20(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ReXNetV1:\n\"\"\"Get ReXNet model with width multiplier of 2.0.\n    Refer to the base class `models.ReXNetV1` for more details.\n    \"\"\"\n    return _rexnet(\"rexnet_20\", 2.0, in_channels, num_classes, pretrained, **kwargs)\n
"},{"location":"reference/models/#senet","title":"senet","text":""},{"location":"reference/models/#mindcv.models.senet.SENet","title":"mindcv.models.senet.SENet","text":"

Bases: nn.Cell

SENet model class, based on \"Squeeze-and-Excitation Networks\" <https://arxiv.org/abs/1709.01507>_

PARAMETER DESCRIPTION block

block class of SENet.

TYPE: Type[Union[SEBottleneck, SEResNetBottleneck, SEResNetBlock, SEResNeXtBottleneck]]

layers

Number of residual blocks for 4 layers.

TYPE: List[int]

group

Number of groups for the conv in each bottleneck block.

TYPE: int

reduction

Reduction ratio for Squeeze-and-Excitation modules.

TYPE: int

drop_rate

Drop probability for the Dropout layer. Default: 0.

TYPE: float DEFAULT: 0.0

in_channels

number the channels of the input. Default: 3.

TYPE: int DEFAULT: 3

inplanes

Number of input channels for layer1. Default: 64.

TYPE: int DEFAULT: 64

input3x3

If True, use three 3x3 convolutions in layer0. Default: False.

TYPE: bool DEFAULT: False

downsample_kernel_size

Kernel size for downsampling convolutions. Default: 1.

TYPE: int DEFAULT: 1

downsample_padding

Padding for downsampling convolutions. Default: 0.

TYPE: int DEFAULT: 0

num_classes

number of classification classes. Default: 1000.

TYPE: int DEFAULT: 1000

Source code in mindcv/models/senet.py
class SENet(nn.Cell):\nr\"\"\"SENet model class, based on\n    `\"Squeeze-and-Excitation Networks\" <https://arxiv.org/abs/1709.01507>`_\n\n    Args:\n        block: block class of SENet.\n        layers: Number of residual blocks for 4 layers.\n        group: Number of groups for the conv in each bottleneck block.\n        reduction: Reduction ratio for Squeeze-and-Excitation modules.\n        drop_rate: Drop probability for the Dropout layer. Default: 0.\n        in_channels: number the channels of the input. Default: 3.\n        inplanes:  Number of input channels for layer1. Default: 64.\n        input3x3: If `True`, use three 3x3 convolutions in layer0. Default: False.\n        downsample_kernel_size: Kernel size for downsampling convolutions. Default: 1.\n        downsample_padding: Padding for downsampling convolutions. Default: 0.\n        num_classes (int): number of classification classes. Default: 1000.\n    \"\"\"\n\n    def __init__(\n        self,\n        block: Type[Union[SEBottleneck, SEResNetBottleneck, SEResNetBlock, SEResNeXtBottleneck]],\n        layers: List[int],\n        group: int,\n        reduction: int,\n        drop_rate: float = 0.0,\n        in_channels: int = 3,\n        inplanes: int = 64,\n        input3x3: bool = False,\n        downsample_kernel_size: int = 1,\n        downsample_padding: int = 0,\n        num_classes: int = 1000,\n    ) -> None:\n        super(SENet, self).__init__()\n        self.inplanes = inplanes\n        self.num_classes = num_classes\n        self.drop_rate = drop_rate\n        if input3x3:\n            self.layer0 = nn.SequentialCell([\n                nn.Conv2d(in_channels, 64, 3, stride=2, pad_mode=\"pad\", padding=1, has_bias=False),\n                nn.BatchNorm2d(64),\n                nn.ReLU(),\n                nn.Conv2d(64, 64, 3, stride=1, pad_mode=\"pad\", padding=1, has_bias=False),\n                nn.BatchNorm2d(64),\n                nn.ReLU(),\n                nn.Conv2d(64, inplanes, 3, stride=1, pad_mode=\"pad\", padding=1, has_bias=False),\n                nn.BatchNorm2d(inplanes),\n                nn.ReLU()\n            ])\n        else:\n            self.layer0 = nn.SequentialCell([\n                nn.Conv2d(in_channels, inplanes, kernel_size=7, stride=2, pad_mode=\"pad\",\n                          padding=3, has_bias=False),\n                nn.BatchNorm2d(inplanes),\n                nn.ReLU()\n            ])\n        self.pool0 = nn.MaxPool2d(3, stride=2, pad_mode=\"same\")\n\n        self.layer1 = self._make_layer(block, planes=64, blocks=layers[0], group=group,\n                                       reduction=reduction, downsample_kernel_size=1,\n                                       downsample_padding=0)\n\n        self.layer2 = self._make_layer(block, planes=128, blocks=layers[1], stride=2,\n                                       group=group, reduction=reduction,\n                                       downsample_kernel_size=downsample_kernel_size,\n                                       downsample_padding=downsample_padding)\n\n        self.layer3 = self._make_layer(block, planes=256, blocks=layers[2], stride=2,\n                                       group=group, reduction=reduction,\n                                       downsample_kernel_size=downsample_kernel_size,\n                                       downsample_padding=downsample_padding)\n\n        self.layer4 = self._make_layer(block, planes=512, blocks=layers[3], stride=2,\n                                       group=group, reduction=reduction,\n                                       downsample_kernel_size=downsample_kernel_size,\n                                       downsample_padding=downsample_padding)\n\n        self.num_features = 512 * block.expansion\n\n        self.pool = GlobalAvgPooling()\n        if self.drop_rate > 0.:\n            self.dropout = Dropout(p=self.drop_rate)\n        self.classifier = nn.Dense(self.num_features, self.num_classes)\n\n        self._initialize_weights()\n\n    def _make_layer(\n        self,\n        block: Type[Union[SEBottleneck, SEResNetBottleneck, SEResNetBlock, SEResNeXtBottleneck]],\n        planes: int,\n        blocks: int,\n        group: int,\n        reduction: int,\n        stride: int = 1,\n        downsample_kernel_size: int = 1,\n        downsample_padding: int = 0,\n    ) -> nn.SequentialCell:\n        downsample = None\n        if stride != 1 or self.inplanes != planes * block.expansion:\n            downsample = nn.SequentialCell([\n                nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=downsample_kernel_size,\n                          stride=stride, pad_mode=\"pad\", padding=downsample_padding, has_bias=False),\n                nn.BatchNorm2d(planes * block.expansion)\n            ])\n\n        layers = [block(self.inplanes, planes, group, reduction, stride, downsample)]\n        self.inplanes = planes * block.expansion\n        for i in range(1, blocks):\n            layers.append(block(self.inplanes, planes, group, reduction))\n\n        return nn.SequentialCell(layers)\n\n    def _initialize_weights(self) -> None:\n\"\"\"Initialize weights for cells.\"\"\"\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                cell.weight.set_data(\n                    init.initializer(init.HeNormal(mode=\"fan_out\", nonlinearity=\"relu\"),\n                                     cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(\n                        init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, nn.BatchNorm2d):\n                cell.gamma.set_data(init.initializer(\"ones\", cell.gamma.shape, cell.gamma.dtype))\n                cell.beta.set_data(init.initializer(\"zeros\", cell.beta.shape, cell.beta.dtype))\n            elif isinstance(cell, nn.Dense):\n                cell.weight.set_data(\n                    init.initializer(init.HeUniform(mode=\"fan_in\", nonlinearity=\"sigmoid\"),\n                                     cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.layer0(x)\n        x = self.pool0(x)\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n        return x\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.pool(x)\n        if self.drop_rate > 0.0:\n            x = self.dropout(x)\n        x = self.classifier(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.senet.senet154","title":"mindcv.models.senet.senet154(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/senet.py
@register_model\ndef senet154(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"senet154\"]\n    model = SENet(block=SEBottleneck, layers=[3, 8, 36, 3], group=64, reduction=16,\n                  downsample_kernel_size=3, downsample_padding=1,  inplanes=128, input3x3=True,\n                  num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.senet.seresnet101","title":"mindcv.models.senet.seresnet101(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/senet.py
@register_model\ndef seresnet101(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"seresnet101\"]\n    model = SENet(block=SEResNetBottleneck, layers=[3, 4, 23, 3], group=1, reduction=16,\n                  num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.senet.seresnet152","title":"mindcv.models.senet.seresnet152(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/senet.py
@register_model\ndef seresnet152(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"seresnet152\"]\n    model = SENet(block=SEResNetBottleneck, layers=[3, 8, 36, 3], group=1, reduction=16,\n                  num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.senet.seresnet18","title":"mindcv.models.senet.seresnet18(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/senet.py
@register_model\ndef seresnet18(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"seresnet18\"]\n    model = SENet(block=SEResNetBlock, layers=[2, 2, 2, 2], group=1, reduction=16,\n                  num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.senet.seresnet34","title":"mindcv.models.senet.seresnet34(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/senet.py
@register_model\ndef seresnet34(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"seresnet34\"]\n    model = SENet(block=SEResNetBlock, layers=[3, 4, 6, 3], group=1, reduction=16,\n                  num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.senet.seresnet50","title":"mindcv.models.senet.seresnet50(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/senet.py
@register_model\ndef seresnet50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"seresnet50\"]\n    model = SENet(block=SEResNetBottleneck, layers=[3, 4, 6, 3], group=1, reduction=16,\n                  num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.senet.seresnext101_32x4d","title":"mindcv.models.senet.seresnext101_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/senet.py
@register_model\ndef seresnext101_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"seresnext101_32x4d\"]\n    model = SENet(block=SEResNeXtBottleneck, layers=[3, 4, 23, 3], group=32, reduction=16,\n                  num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.senet.seresnext26_32x4d","title":"mindcv.models.senet.seresnext26_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/senet.py
@register_model\ndef seresnext26_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"seresnext26_32x4d\"]\n    model = SENet(block=SEResNeXtBottleneck, layers=[2, 2, 2, 2], group=32, reduction=16,\n                  num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.senet.seresnext50_32x4d","title":"mindcv.models.senet.seresnext50_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/senet.py
@register_model\ndef seresnext50_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"seresnext50_32x4d\"]\n    model = SENet(block=SEResNeXtBottleneck, layers=[3, 4, 6, 3], group=32, reduction=16,\n                  num_classes=num_classes, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#shufflenetv1","title":"shufflenetv1","text":""},{"location":"reference/models/#mindcv.models.shufflenetv1.ShuffleNetV1","title":"mindcv.models.shufflenetv1.ShuffleNetV1","text":"

Bases: nn.Cell

ShuffleNetV1 model class, based on \"ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices\" <https://arxiv.org/abs/1707.01083>_ # noqa: E501

PARAMETER DESCRIPTION num_classes

number of classification classes. Default: 1000.

TYPE: int DEFAULT: 1000

in_channels

number of input channels. Default: 3.

TYPE: int DEFAULT: 3

model_size

scale factor which controls the number of channels. Default: '2.0x'.

TYPE: str DEFAULT: '2.0x'

group

number of group for group convolution. Default: 3.

TYPE: int DEFAULT: 3

Source code in mindcv/models/shufflenetv1.py
class ShuffleNetV1(nn.Cell):\nr\"\"\"ShuffleNetV1 model class, based on\n    `\"ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices\" <https://arxiv.org/abs/1707.01083>`_  # noqa: E501\n\n    Args:\n        num_classes: number of classification classes. Default: 1000.\n        in_channels: number of input channels. Default: 3.\n        model_size: scale factor which controls the number of channels. Default: '2.0x'.\n        group: number of group for group convolution. Default: 3.\n    \"\"\"\n\n    def __init__(\n        self,\n        num_classes: int = 1000,\n        in_channels: int = 3,\n        model_size: str = \"2.0x\",\n        group: int = 3,\n    ):\n        super().__init__()\n        self.stage_repeats = [4, 8, 4]\n        self.model_size = model_size\n        if group == 3:\n            if model_size == \"0.5x\":\n                self.stage_out_channels = [-1, 12, 120, 240, 480]\n            elif model_size == \"1.0x\":\n                self.stage_out_channels = [-1, 24, 240, 480, 960]\n            elif model_size == \"1.5x\":\n                self.stage_out_channels = [-1, 24, 360, 720, 1440]\n            elif model_size == \"2.0x\":\n                self.stage_out_channels = [-1, 48, 480, 960, 1920]\n            else:\n                raise NotImplementedError\n        elif group == 8:\n            if model_size == \"0.5x\":\n                self.stage_out_channels = [-1, 16, 192, 384, 768]\n            elif model_size == \"1.0x\":\n                self.stage_out_channels = [-1, 24, 384, 768, 1536]\n            elif model_size == \"1.5x\":\n                self.stage_out_channels = [-1, 24, 576, 1152, 2304]\n            elif model_size == \"2.0x\":\n                self.stage_out_channels = [-1, 48, 768, 1536, 3072]\n            else:\n                raise NotImplementedError\n\n        # building first layer\n        input_channel = self.stage_out_channels[1]\n        self.first_conv = nn.SequentialCell(\n            nn.Conv2d(in_channels, input_channel, kernel_size=3, stride=2, pad_mode=\"pad\", padding=1),\n            nn.BatchNorm2d(input_channel),\n            nn.ReLU(),\n        )\n        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode=\"same\")\n\n        features = []\n        for idxstage, numrepeat in enumerate(self.stage_repeats):\n            output_channel = self.stage_out_channels[idxstage + 2]\n            for i in range(numrepeat):\n                stride = 2 if i == 0 else 1\n                first_group = idxstage == 0 and i == 0\n                features.append(ShuffleV1Block(input_channel, output_channel,\n                                               group=group, first_group=first_group,\n                                               mid_channels=output_channel // 4, stride=stride))\n                input_channel = output_channel\n\n        self.features = nn.SequentialCell(features)\n        self.global_pool = GlobalAvgPooling()\n        self.classifier = nn.Dense(self.stage_out_channels[-1], num_classes, has_bias=False)\n        self._initialize_weights()\n\n    def _initialize_weights(self):\n\"\"\"Initialize weights for cells.\"\"\"\n        for name, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                if \"first\" in name:\n                    cell.weight.set_data(\n                        init.initializer(init.Normal(0.01, 0), cell.weight.shape, cell.weight.dtype))\n                else:\n                    cell.weight.set_data(\n                        init.initializer(init.Normal(1.0 / cell.weight.shape[1], 0), cell.weight.shape,\n                                         cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(\n                        init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, nn.Dense):\n                cell.weight.set_data(\n                    init.initializer(init.Normal(0.01, 0), cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(\n                        init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.first_conv(x)\n        x = self.max_pool(x)\n        x = self.features(x)\n        return x\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.global_pool(x)\n        x = self.classifier(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.shufflenetv1.shufflenet_v1_g3_05","title":"mindcv.models.shufflenetv1.shufflenet_v1_g3_05(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ShuffleNetV1 model with width scaled by 0.5 and 3 groups of GPConv. Refer to the base class models.ShuffleNetV1 for more details.

Source code in mindcv/models/shufflenetv1.py
@register_model\ndef shufflenet_v1_g3_05(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:\n\"\"\"Get ShuffleNetV1 model with width scaled by 0.5 and 3 groups of GPConv.\n    Refer to the base class `models.ShuffleNetV1` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"shufflenet_v1_g3_05\"]\n    model = ShuffleNetV1(group=3, model_size=\"0.5x\", num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.shufflenetv1.shufflenet_v1_g3_10","title":"mindcv.models.shufflenetv1.shufflenet_v1_g3_10(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ShuffleNetV1 model with width scaled by 1.0 and 3 groups of GPConv. Refer to the base class models.ShuffleNetV1 for more details.

Source code in mindcv/models/shufflenetv1.py
@register_model\ndef shufflenet_v1_g3_10(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:\n\"\"\"Get ShuffleNetV1 model with width scaled by 1.0 and 3 groups of GPConv.\n    Refer to the base class `models.ShuffleNetV1` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"shufflenet_v1_g3_10\"]\n    model = ShuffleNetV1(group=3, model_size=\"1.0x\", num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.shufflenetv1.shufflenet_v1_g3_15","title":"mindcv.models.shufflenetv1.shufflenet_v1_g3_15(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ShuffleNetV1 model with width scaled by 1.5 and 3 groups of GPConv. Refer to the base class models.ShuffleNetV1 for more details.

Source code in mindcv/models/shufflenetv1.py
@register_model\ndef shufflenet_v1_g3_15(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:\n\"\"\"Get ShuffleNetV1 model with width scaled by 1.5 and 3 groups of GPConv.\n    Refer to the base class `models.ShuffleNetV1` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"shufflenet_v1_g3_15\"]\n    model = ShuffleNetV1(group=3, model_size=\"1.5x\", num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.shufflenetv1.shufflenet_v1_g3_20","title":"mindcv.models.shufflenetv1.shufflenet_v1_g3_20(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ShuffleNetV1 model with width scaled by 2.0 and 3 groups of GPConv. Refer to the base class models.ShuffleNetV1 for more details.

Source code in mindcv/models/shufflenetv1.py
@register_model\ndef shufflenet_v1_g3_20(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:\n\"\"\"Get ShuffleNetV1 model with width scaled by 2.0 and 3 groups of GPConv.\n    Refer to the base class `models.ShuffleNetV1` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"shufflenet_v1_g3_20\"]\n    model = ShuffleNetV1(group=3, model_size=\"2.0x\", num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.shufflenetv1.shufflenet_v1_g8_05","title":"mindcv.models.shufflenetv1.shufflenet_v1_g8_05(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ShuffleNetV1 model with width scaled by 0.5 and 8 groups of GPConv. Refer to the base class models.ShuffleNetV1 for more details.

Source code in mindcv/models/shufflenetv1.py
@register_model\ndef shufflenet_v1_g8_05(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:\n\"\"\"Get ShuffleNetV1 model with width scaled by 0.5 and 8 groups of GPConv.\n    Refer to the base class `models.ShuffleNetV1` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"shufflenet_v1_g8_05\"]\n    model = ShuffleNetV1(group=8, model_size=\"0.5x\", num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.shufflenetv1.shufflenet_v1_g8_10","title":"mindcv.models.shufflenetv1.shufflenet_v1_g8_10(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ShuffleNetV1 model with width scaled by 1.0 and 8 groups of GPConv. Refer to the base class models.ShuffleNetV1 for more details.

Source code in mindcv/models/shufflenetv1.py
@register_model\ndef shufflenet_v1_g8_10(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:\n\"\"\"Get ShuffleNetV1 model with width scaled by 1.0 and 8 groups of GPConv.\n    Refer to the base class `models.ShuffleNetV1` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"shufflenet_v1_g8_10\"]\n    model = ShuffleNetV1(group=8, model_size=\"1.0x\", num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.shufflenetv1.shufflenet_v1_g8_15","title":"mindcv.models.shufflenetv1.shufflenet_v1_g8_15(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ShuffleNetV1 model with width scaled by 1.5 and 8 groups of GPConv. Refer to the base class models.ShuffleNetV1 for more details.

Source code in mindcv/models/shufflenetv1.py
@register_model\ndef shufflenet_v1_g8_15(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:\n\"\"\"Get ShuffleNetV1 model with width scaled by 1.5 and 8 groups of GPConv.\n    Refer to the base class `models.ShuffleNetV1` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"shufflenet_v1_g8_15\"]\n    model = ShuffleNetV1(group=8, model_size=\"1.5x\", num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.shufflenetv1.shufflenet_v1_g8_20","title":"mindcv.models.shufflenetv1.shufflenet_v1_g8_20(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ShuffleNetV1 model with width scaled by 2.0 and 8 groups of GPConv. Refer to the base class models.ShuffleNetV1 for more details.

Source code in mindcv/models/shufflenetv1.py
@register_model\ndef shufflenet_v1_g8_20(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:\n\"\"\"Get ShuffleNetV1 model with width scaled by 2.0 and 8 groups of GPConv.\n    Refer to the base class `models.ShuffleNetV1` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"shufflenet_v1_g8_20\"]\n    model = ShuffleNetV1(group=8, model_size=\"2.0x\", num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#shufflenetv2","title":"shufflenetv2","text":""},{"location":"reference/models/#mindcv.models.shufflenetv2.ShuffleNetV2","title":"mindcv.models.shufflenetv2.ShuffleNetV2","text":"

Bases: nn.Cell

ShuffleNetV2 model class, based on \"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design\" <https://arxiv.org/abs/1807.11164>_

PARAMETER DESCRIPTION num_classes

number of classification classes. Default: 1000.

TYPE: int DEFAULT: 1000

in_channels

number of input channels. Default: 3.

TYPE: int DEFAULT: 3

model_size

scale factor which controls the number of channels. Default: '1.5x'.

TYPE: str DEFAULT: '1.5x'

Source code in mindcv/models/shufflenetv2.py
class ShuffleNetV2(nn.Cell):\nr\"\"\"ShuffleNetV2 model class, based on\n    `\"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design\" <https://arxiv.org/abs/1807.11164>`_\n\n    Args:\n        num_classes: number of classification classes. Default: 1000.\n        in_channels: number of input channels. Default: 3.\n        model_size: scale factor which controls the number of channels. Default: '1.5x'.\n    \"\"\"\n\n    def __init__(\n        self,\n        num_classes: int = 1000,\n        in_channels: int = 3,\n        model_size: str = \"1.5x\",\n    ):\n        super().__init__()\n\n        self.stage_repeats = [4, 8, 4]\n        self.model_size = model_size\n        if model_size == \"0.5x\":\n            self.stage_out_channels = [-1, 24, 48, 96, 192, 1024]\n        elif model_size == \"1.0x\":\n            self.stage_out_channels = [-1, 24, 116, 232, 464, 1024]\n        elif model_size == \"1.5x\":\n            self.stage_out_channels = [-1, 24, 176, 352, 704, 1024]\n        elif model_size == \"2.0x\":\n            self.stage_out_channels = [-1, 24, 244, 488, 976, 2048]\n        else:\n            raise NotImplementedError\n\n        # building first layer\n        input_channel = self.stage_out_channels[1]\n        self.first_conv = nn.SequentialCell([\n            nn.Conv2d(in_channels, input_channel, kernel_size=3, stride=2,\n                      pad_mode=\"pad\", padding=1),\n            nn.BatchNorm2d(input_channel),\n            nn.ReLU(),\n        ])\n        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode=\"same\")\n\n        self.features = []\n        for idxstage, numrepeat in enumerate(self.stage_repeats):\n            output_channel = self.stage_out_channels[idxstage + 2]\n            for i in range(numrepeat):\n                if i == 0:\n                    self.features.append(ShuffleV2Block(input_channel, output_channel,\n                                                        mid_channels=output_channel // 2, kernel_size=3, stride=2))\n                else:\n                    self.features.append(ShuffleV2Block(input_channel // 2, output_channel,\n                                                        mid_channels=output_channel // 2, kernel_size=3, stride=1))\n                input_channel = output_channel\n\n        self.features = nn.SequentialCell(self.features)\n\n        self.conv_last = nn.SequentialCell([\n            nn.Conv2d(input_channel, self.stage_out_channels[-1], kernel_size=1, stride=1),\n            nn.BatchNorm2d(self.stage_out_channels[-1]),\n            nn.ReLU()\n        ])\n        self.pool = GlobalAvgPooling()\n        self.classifier = nn.Dense(self.stage_out_channels[-1], num_classes, has_bias=False)\n        self._initialize_weights()\n\n    def _initialize_weights(self):\n\"\"\"Initialize weights for cells.\"\"\"\n        for name, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                if \"first\" in name:\n                    cell.weight.set_data(\n                        init.initializer(init.Normal(0.01, 0), cell.weight.shape, cell.weight.dtype))\n                else:\n                    cell.weight.set_data(\n                        init.initializer(init.Normal(1.0 / cell.weight.shape[1], 0), cell.weight.shape,\n                                         cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(\n                        init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, nn.Dense):\n                cell.weight.set_data(\n                    init.initializer(init.Normal(0.01, 0), cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(\n                        init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.first_conv(x)\n        x = self.max_pool(x)\n        x = self.features(x)\n        return x\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.conv_last(x)\n        x = self.pool(x)\n        x = self.classifier(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.shufflenetv2.shufflenet_v2_x0_5","title":"mindcv.models.shufflenetv2.shufflenet_v2_x0_5(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ShuffleNetV2 model with width scaled by 0.5. Refer to the base class models.ShuffleNetV2 for more details.

Source code in mindcv/models/shufflenetv2.py
@register_model\ndef shufflenet_v2_x0_5(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV2:\n\"\"\"Get ShuffleNetV2 model with width scaled by 0.5.\n    Refer to the base class `models.ShuffleNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"shufflenet_v2_x0_5\"]\n    model = ShuffleNetV2(model_size=\"0.5x\", num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.shufflenetv2.shufflenet_v2_x1_0","title":"mindcv.models.shufflenetv2.shufflenet_v2_x1_0(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ShuffleNetV2 model with width scaled by 1.0. Refer to the base class models.ShuffleNetV2 for more details.

Source code in mindcv/models/shufflenetv2.py
@register_model\ndef shufflenet_v2_x1_0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV2:\n\"\"\"Get ShuffleNetV2 model with width scaled by 1.0.\n    Refer to the base class `models.ShuffleNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"shufflenet_v2_x1_0\"]\n    model = ShuffleNetV2(model_size=\"1.0x\", num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.shufflenetv2.shufflenet_v2_x1_5","title":"mindcv.models.shufflenetv2.shufflenet_v2_x1_5(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ShuffleNetV2 model with width scaled by 1.5. Refer to the base class models.ShuffleNetV2 for more details.

Source code in mindcv/models/shufflenetv2.py
@register_model\ndef shufflenet_v2_x1_5(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV2:\n\"\"\"Get ShuffleNetV2 model with width scaled by 1.5.\n    Refer to the base class `models.ShuffleNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"shufflenet_v2_x1_5\"]\n    model = ShuffleNetV2(model_size=\"1.5x\", num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.shufflenetv2.shufflenet_v2_x2_0","title":"mindcv.models.shufflenetv2.shufflenet_v2_x2_0(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get ShuffleNetV2 model with width scaled by 2.0. Refer to the base class models.ShuffleNetV2 for more details.

Source code in mindcv/models/shufflenetv2.py
@register_model\ndef shufflenet_v2_x2_0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV2:\n\"\"\"Get ShuffleNetV2 model with width scaled by 2.0.\n    Refer to the base class `models.ShuffleNetV2` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"shufflenet_v2_x2_0\"]\n    model = ShuffleNetV2(model_size=\"2.0x\", num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#sknet","title":"sknet","text":""},{"location":"reference/models/#mindcv.models.sknet.SKNet","title":"mindcv.models.sknet.SKNet","text":"

Bases: ResNet

SKNet model class, based on \"Selective Kernel Networks\" <https://arxiv.org/abs/1903.06586>_

PARAMETER DESCRIPTION block

block of sknet.

TYPE: Type[nn.Cell]

layers

number of layers of each stage.

TYPE: List[int]

num_classes

number of classification classes. Default: 1000.

TYPE: int DEFAULT: 1000

in_channels

number the channels of the input. Default: 3.

TYPE: int DEFAULT: 3

groups

number of groups for group conv in blocks. Default: 1.

TYPE: int DEFAULT: 1

base_width

base width of pre group hidden channel in blocks. Default: 64.

TYPE: int DEFAULT: 64

norm

normalization layer in blocks. Default: None.

TYPE: Optional[nn.Cell] DEFAULT: None

sk_kwargs

kwargs of selective kernel. Default: None.

TYPE: Optional[Dict] DEFAULT: None

Source code in mindcv/models/sknet.py
class SKNet(ResNet):\nr\"\"\"SKNet model class, based on\n    `\"Selective Kernel Networks\" <https://arxiv.org/abs/1903.06586>`_\n\n    Args:\n        block: block of sknet.\n        layers: number of layers of each stage.\n        num_classes: number of classification classes. Default: 1000.\n        in_channels: number the channels of the input. Default: 3.\n        groups: number of groups for group conv in blocks. Default: 1.\n        base_width: base width of pre group hidden channel in blocks. Default: 64.\n        norm: normalization layer in blocks. Default: None.\n        sk_kwargs: kwargs of selective kernel. Default: None.\n    \"\"\"\n\n    def __init__(\n        self,\n        block: Type[nn.Cell],\n        layers: List[int],\n        num_classes: int = 1000,\n        in_channels: int = 3,\n        groups: int = 1,\n        base_width: int = 64,\n        norm: Optional[nn.Cell] = None,\n        sk_kwargs: Optional[Dict] = None,\n    ) -> None:\n        self.sk_kwargs: Optional[Dict] = sk_kwargs  # make pylint happy\n        super().__init__(block, layers, num_classes, in_channels, groups, base_width, norm)\n\n    def _make_layer(\n        self,\n        block: Type[Union[SelectiveKernelBasic, SelectiveKernelBottleneck]],\n        channels: int,\n        block_nums: int,\n        stride: int = 1,\n    ) -> nn.SequentialCell:\n        down_sample = None\n\n        if stride != 1 or self.input_channels != channels * block.expansion:\n            down_sample = nn.SequentialCell([\n                nn.Conv2d(self.input_channels, channels * block.expansion, kernel_size=1, stride=stride),\n                self.norm(channels * block.expansion)\n            ])\n\n        layers = []\n        layers.append(\n            block(\n                self.input_channels,\n                channels,\n                stride=stride,\n                down_sample=down_sample,\n                groups=self.groups,\n                base_width=self.base_with,\n                norm=self.norm,\n                sk_kwargs=self.sk_kwargs,\n            )\n        )\n        self.input_channels = channels * block.expansion\n\n        for _ in range(1, block_nums):\n            layers.append(\n                block(\n                    self.input_channels,\n                    channels,\n                    groups=self.groups,\n                    base_width=self.base_with,\n                    norm=self.norm,\n                    sk_kwargs=self.sk_kwargs,\n                )\n            )\n\n        return nn.SequentialCell(layers)\n
"},{"location":"reference/models/#mindcv.models.sknet.skresnet18","title":"mindcv.models.sknet.skresnet18(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 18 layers SKNet model. Refer to the base class models.SKNet for more details.

Source code in mindcv/models/sknet.py
@register_model\ndef skresnet18(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ResNet:\n\"\"\"Get 18 layers SKNet model.\n    Refer to the base class `models.SKNet` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"skresnet18\"]\n    sk_kwargs = dict(rd_ratio=1 / 8, rd_divisor=16, split_input=True)\n    model = SKNet(SelectiveKernelBasic, [2, 2, 2, 2], num_classes=num_classes, in_channels=in_channels,\n                  sk_kwargs=sk_kwargs, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.sknet.skresnet34","title":"mindcv.models.sknet.skresnet34(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 34 layers SKNet model. Refer to the base class models.SKNet for more details.

Source code in mindcv/models/sknet.py
@register_model\ndef skresnet34(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ResNet:\n\"\"\"Get 34 layers SKNet model.\n    Refer to the base class `models.SKNet` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"skresnet34\"]\n    sk_kwargs = dict(rd_ratio=1 / 8, rd_divisor=16, split_input=True)\n    model = SKNet(SelectiveKernelBasic, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels,\n                  sk_kwargs=sk_kwargs, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.sknet.skresnet50","title":"mindcv.models.sknet.skresnet50(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 50 layers SKNet model. Refer to the base class models.SKNet for more details.

Source code in mindcv/models/sknet.py
@register_model\ndef skresnet50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ResNet:\n\"\"\"Get 50 layers SKNet model.\n    Refer to the base class `models.SKNet` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"skresnet50\"]\n    sk_kwargs = dict(split_input=True)\n    model = SKNet(SelectiveKernelBottleneck, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels,\n                  sk_kwargs=sk_kwargs, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.sknet.skresnext50_32x4d","title":"mindcv.models.sknet.skresnext50_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 50 layers SKNeXt model with 32 groups of GPConv. Refer to the base class models.SKNet for more details.

Source code in mindcv/models/sknet.py
@register_model\ndef skresnext50_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ResNet:\n\"\"\"Get 50 layers SKNeXt model with 32 groups of GPConv.\n    Refer to the base class `models.SKNet` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"skresnext50_32x4d\"]\n    sk_kwargs = dict(rd_ratio=1 / 16, rd_divisor=32, split_input=False)\n    model = SKNet(SelectiveKernelBottleneck, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels,\n                  sk_kwargs=sk_kwargs, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#squeezenet","title":"squeezenet","text":""},{"location":"reference/models/#mindcv.models.squeezenet.SqueezeNet","title":"mindcv.models.squeezenet.SqueezeNet","text":"

Bases: nn.Cell

SqueezeNet model class, based on \"SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size\" <https://arxiv.org/abs/1602.07360>_ # noqa: E501

.. note:: Important: In contrast to the other models the inception_v3 expects tensors with a size of N x 3 x 227 x 227, so ensure your images are sized accordingly.

PARAMETER DESCRIPTION version

version of the architecture, '1_0' or '1_1'. Default: '1_0'.

TYPE: str DEFAULT: '1_0'

num_classes

number of classification classes. Default: 1000.

TYPE: int DEFAULT: 1000

drop_rate

dropout rate of the classifier. Default: 0.5.

TYPE: float DEFAULT: 0.5

in_channels

number the channels of the input. Default: 3.

TYPE: int DEFAULT: 3

Source code in mindcv/models/squeezenet.py
class SqueezeNet(nn.Cell):\nr\"\"\"SqueezeNet model class, based on\n    `\"SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size\" <https://arxiv.org/abs/1602.07360>`_  # noqa: E501\n\n    .. note::\n        **Important**: In contrast to the other models the inception_v3 expects tensors with a size of\n        N x 3 x 227 x 227, so ensure your images are sized accordingly.\n\n    Args:\n        version: version of the architecture, '1_0' or '1_1'. Default: '1_0'.\n        num_classes: number of classification classes. Default: 1000.\n        drop_rate: dropout rate of the classifier. Default: 0.5.\n        in_channels: number the channels of the input. Default: 3.\n    \"\"\"\n\n    def __init__(\n        self,\n        version: str = \"1_0\",\n        num_classes: int = 1000,\n        drop_rate: float = 0.5,\n        in_channels: int = 3,\n    ) -> None:\n        super().__init__()\n        if version == \"1_0\":\n            self.features = nn.SequentialCell([\n                nn.Conv2d(in_channels, 96, kernel_size=7, stride=2, pad_mode=\"valid\", has_bias=True),\n                nn.ReLU(),\n                nn.MaxPool2d(kernel_size=3, stride=2),\n                Fire(96, 16, 64, 64),\n                Fire(128, 16, 64, 64),\n                Fire(128, 32, 128, 128),\n                nn.MaxPool2d(kernel_size=3, stride=2),\n                Fire(256, 32, 128, 128),\n                Fire(256, 48, 192, 192),\n                Fire(384, 48, 192, 192),\n                Fire(384, 64, 256, 256),\n                nn.MaxPool2d(kernel_size=3, stride=2),\n                Fire(512, 64, 256, 256),\n            ])\n        elif version == \"1_1\":\n            self.features = nn.SequentialCell([\n                nn.Conv2d(in_channels, 64, kernel_size=3, stride=2, padding=1, pad_mode=\"pad\", has_bias=True),\n                nn.ReLU(),\n                nn.MaxPool2d(kernel_size=3, stride=2),\n                Fire(64, 16, 64, 64),\n                Fire(128, 16, 64, 64),\n                nn.MaxPool2d(kernel_size=3, stride=2),\n                Fire(128, 32, 128, 128),\n                Fire(256, 32, 128, 128),\n                nn.MaxPool2d(kernel_size=3, stride=2),\n                Fire(256, 48, 192, 192),\n                Fire(384, 48, 192, 192),\n                Fire(384, 64, 256, 256),\n                Fire(512, 64, 256, 256),\n            ])\n        else:\n            raise ValueError(f\"Unsupported SqueezeNet version {version}: 1_0 or 1_1 expected\")\n\n        self.final_conv = nn.Conv2d(512, num_classes, kernel_size=1, has_bias=True)\n        self.classifier = nn.SequentialCell([\n            Dropout(p=drop_rate),\n            self.final_conv,\n            nn.ReLU(),\n            GlobalAvgPooling()\n        ])\n        self._initialize_weights()\n\n    def _initialize_weights(self):\n\"\"\"Initialize weights for cells.\"\"\"\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                if cell is self.final_conv:\n                    cell.weight.set_data(init.initializer(init.Normal(), cell.weight.shape, cell.weight.dtype))\n                else:\n                    cell.weight.set_data(init.initializer(init.HeUniform(), cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.features(x)\n        return x\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.classifier(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.squeezenet.squeezenet1_0","title":"mindcv.models.squeezenet.squeezenet1_0(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get SqueezeNet model of version 1.0. Refer to the base class models.SqueezeNet for more details.

Source code in mindcv/models/squeezenet.py
@register_model\ndef squeezenet1_0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> SqueezeNet:\n\"\"\"Get SqueezeNet model of version 1.0.\n    Refer to the base class `models.SqueezeNet` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"squeezenet1_0\"]\n    model = SqueezeNet(version=\"1_0\", num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.squeezenet.squeezenet1_1","title":"mindcv.models.squeezenet.squeezenet1_1(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get SqueezeNet model of version 1.1. Refer to the base class models.SqueezeNet for more details.

Source code in mindcv/models/squeezenet.py
@register_model\ndef squeezenet1_1(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> SqueezeNet:\n\"\"\"Get SqueezeNet model of version 1.1.\n    Refer to the base class `models.SqueezeNet` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"squeezenet1_1\"]\n    model = SqueezeNet(version=\"1_1\", num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#swintransformer","title":"swintransformer","text":""},{"location":"reference/models/#mindcv.models.swintransformer.SwinTransformer","title":"mindcv.models.swintransformer.SwinTransformer","text":"

Bases: nn.Cell

SwinTransformer model class, based on \"Swin Transformer: Hierarchical Vision Transformer using Shifted Windows\" <https://arxiv.org/pdf/2103.14030>_

PARAMETER DESCRIPTION image_size

Input image size. Default 224

TYPE: int | tuple(int DEFAULT: 224

patch_size

Patch size. Default: 4

TYPE: int | tuple(int DEFAULT: 4

in_chans

Number of input image channels. Default: 3

TYPE: int DEFAULT: 3

num_classes

Number of classes for classification head. Default: 1000

TYPE: int DEFAULT: 1000

embed_dim

Patch embedding dimension. Default: 96

TYPE: int DEFAULT: 96

depths

Depth of each Swin Transformer layer.

TYPE: tuple(int DEFAULT: None

num_heads

Number of attention heads in different layers.

TYPE: tuple(int DEFAULT: None

window_size

Window size. Default: 7

TYPE: int DEFAULT: 7

mlp_ratio

Ratio of mlp hidden dim to embedding dim. Default: 4

TYPE: float DEFAULT: 4.0

qkv_bias

If True, add a learnable bias to query, key, value. Default: True

TYPE: bool DEFAULT: True

qk_scale

Override default qk scale of head_dim ** -0.5 if set. Default: None

TYPE: float DEFAULT: None

drop_rate

Dropout rate. Default: 0

TYPE: float DEFAULT: 0.0

attn_drop_rate

Attention dropout rate. Default: 0

TYPE: float DEFAULT: 0.0

drop_path_rate

Stochastic depth rate. Default: 0.1

TYPE: float DEFAULT: 0.1

norm_layer

Normalization layer. Default: nn.LayerNorm.

TYPE: nn.Cell DEFAULT: nn.LayerNorm

ape

If True, add absolute position embedding to the patch embedding. Default: False

TYPE: bool DEFAULT: False

patch_norm

If True, add normalization after patch embedding. Default: True

TYPE: bool DEFAULT: True

Source code in mindcv/models/swintransformer.py
class SwinTransformer(nn.Cell):\nr\"\"\"SwinTransformer model class, based on\n    `\"Swin Transformer: Hierarchical Vision Transformer using Shifted Windows\" <https://arxiv.org/pdf/2103.14030>`_\n\n    Args:\n        image_size (int | tuple(int)): Input image size. Default 224\n        patch_size (int | tuple(int)): Patch size. Default: 4\n        in_chans (int): Number of input image channels. Default: 3\n        num_classes (int): Number of classes for classification head. Default: 1000\n        embed_dim (int): Patch embedding dimension. Default: 96\n        depths (tuple(int)): Depth of each Swin Transformer layer.\n        num_heads (tuple(int)): Number of attention heads in different layers.\n        window_size (int): Window size. Default: 7\n        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4\n        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True\n        qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None\n        drop_rate (float): Dropout rate. Default: 0\n        attn_drop_rate (float): Attention dropout rate. Default: 0\n        drop_path_rate (float): Stochastic depth rate. Default: 0.1\n        norm_layer (nn.Cell): Normalization layer. Default: nn.LayerNorm.\n        ape (bool): If True, add absolute position embedding to the patch embedding. Default: False\n        patch_norm (bool): If True, add normalization after patch embedding. Default: True\n    \"\"\"\n\n    def __init__(\n        self,\n        image_size: int = 224,\n        patch_size: int = 4,\n        in_chans: int = 3,\n        num_classes: int = 1000,\n        embed_dim: int = 96,\n        depths: Optional[List[int]] = None,\n        num_heads: Optional[List[int]] = None,\n        window_size: int = 7,\n        mlp_ratio: float = 4.0,\n        qkv_bias: bool = True,\n        qk_scale: Optional[int] = None,\n        drop_rate: float = 0.0,\n        attn_drop_rate: float = 0.0,\n        drop_path_rate: float = 0.1,\n        norm_layer: Optional[nn.Cell] = nn.LayerNorm,\n        ape: bool = False,\n        patch_norm: bool = True,\n    ) -> None:\n        super().__init__()\n\n        self.num_classes = num_classes\n        self.num_layers = len(depths)\n        self.embed_dim = embed_dim\n        self.ape = ape\n        self.patch_norm = patch_norm\n        self.num_features = int(embed_dim * 2 ** (self.num_layers - 1))\n        self.mlp_ratio = mlp_ratio\n\n        # split image into non-overlapping patches\n        self.patch_embed = PatchEmbed(\n            image_size=image_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim,\n            norm_layer=norm_layer if self.patch_norm else None)\n        num_patches = self.patch_embed.num_patches\n        patches_resolution = self.patch_embed.patches_resolution\n        self.patches_resolution = patches_resolution\n\n        # absolute position embedding\n        if self.ape:\n            self.absolute_pos_embed = Parameter(Tensor(np.zeros(1, num_patches, embed_dim), dtype=mstype.float32))\n\n        self.pos_drop = Dropout(p=drop_rate)\n\n        # stochastic depth\n        dpr = [x for x in np.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule\n\n        # build layers\n        self.layers = nn.CellList()\n        for i_layer in range(self.num_layers):\n            layer = BasicLayer(dim=int(embed_dim * 2 ** i_layer),\n                               input_resolution=(patches_resolution[0] // (2 ** i_layer),\n                                                 patches_resolution[1] // (2 ** i_layer)),\n                               depth=depths[i_layer],\n                               num_heads=num_heads[i_layer],\n                               window_size=window_size,\n                               mlp_ratio=self.mlp_ratio,\n                               qkv_bias=qkv_bias, qk_scale=qk_scale,\n                               drop=drop_rate, attn_drop=attn_drop_rate,\n                               drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],\n                               norm_layer=norm_layer,\n                               downsample=PatchMerging if (i_layer < self.num_layers - 1) else None)\n            self.layers.append(layer)\n\n        self.norm = norm_layer([self.num_features, ], epsilon=1e-5)\n        self.classifier = nn.Dense(in_channels=self.num_features,\n                                   out_channels=num_classes, has_bias=True) if num_classes > 0 else Identity()\n        self._initialize_weights()\n\n    def _initialize_weights(self) -> None:\n\"\"\"Initialize weights for cells.\"\"\"\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Dense):\n                cell.weight.set_data(init.initializer(init.TruncatedNormal(sigma=0.02),\n                                                      cell.weight.shape, cell.weight.dtype))\n                if isinstance(cell, nn.Dense) and cell.bias is not None:\n                    cell.bias.set_data(init.initializer(init.Zero(), cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, nn.LayerNorm):\n                cell.gamma.set_data(init.initializer(init.One(), cell.gamma.shape, cell.gamma.dtype))\n                cell.beta.set_data(init.initializer(init.Zero(), cell.beta.shape, cell.beta.dtype))\n\n    def no_weight_decay(self) -> None:\n        return {\"absolute_pos_embed\"}\n\n    def no_weight_decay_keywords(self) -> None:\n        return {\"relative_position_bias_table\"}\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.classifier(x)\n        return x\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.patch_embed(x)\n        if self.ape:\n            x = x + self.absolute_pos_embed\n        x = self.pos_drop(x)\n        for layer in self.layers:\n            x = layer(x)\n        x = self.norm(x)  # B L C\n        x = ops.mean(ops.transpose(x, (0, 2, 1)), 2)  # B C 1\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.swintransformer.swin_tiny","title":"mindcv.models.swintransformer.swin_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get SwinTransformer tiny model. Refer to the base class 'models.SwinTransformer' for more details.

Source code in mindcv/models/swintransformer.py
@register_model\ndef swin_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> SwinTransformer:\n\"\"\"Get SwinTransformer tiny model.\n    Refer to the base class 'models.SwinTransformer' for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"swin_tiny\"]\n    model = SwinTransformer(image_size=224, patch_size=4, in_chans=in_channels, num_classes=num_classes,\n                            embed_dim=96, depths=[2, 2, 6, 2], num_heads=[3, 6, 12, 24], window_size=7,\n                            mlp_ratio=4., qkv_bias=True, qk_scale=None,\n                            drop_rate=0., attn_drop_rate=0., drop_path_rate=0.2,\n                            norm_layer=nn.LayerNorm, ape=False, patch_norm=True, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#swintransformerv2","title":"swintransformerv2","text":""},{"location":"reference/models/#mindcv.models.swintransformerv2.SwinTransformerV2","title":"mindcv.models.swintransformerv2.SwinTransformerV2","text":"

Bases: nn.Cell

SwinTransformerV2 model class, based on \"Swin Transformer V2: Scaling Up Capacity and Resolution\" <https://arxiv.org/abs/2111.09883>_

PARAMETER DESCRIPTION image_size

Input image size. Default: 256.

TYPE: int DEFAULT: 256

patch_size

Patch size. Default: 4.

TYPE: int DEFAULT: 4

in_channels

Number the channels of the input. Default: 3.

TYPE: int DEFAULT: 3

num_classes

Number of classification classes. Default: 1000.

TYPE: int DEFAULT: 1000

embed_dim

Patch embedding dimension. Default: 96.

TYPE: int DEFAULT: 96

depths

Depth of each Swin Transformer layer. Default: [2, 2, 6, 2].

TYPE: List[int] DEFAULT: [2, 2, 6, 2]

num_heads

Number of attention heads in different layers. Default: [3, 6, 12, 24].

TYPE: List[int] DEFAULT: [3, 6, 12, 24]

window_size

Window size. Default: 7.

TYPE: int DEFAULT: 7

mlp_ratio

Ratio of mlp hidden dim to embedding dim. Default: 4.

TYPE: float DEFAULT: 4.0

qkv_bias

If True, add a bias for query, key, value. Default: True.

TYPE: bool DEFAULT: True

drop_rate

Drop probability for the Dropout layer. Default: 0.

TYPE: float DEFAULT: 0.0

attn_drop_rate

Attention drop probability for the Dropout layer. Default: 0.

TYPE: float DEFAULT: 0.0

drop_path_rate

Stochastic depth rate. Default: 0.1.

TYPE: float DEFAULT: 0.1

norm_layer

Normalization layer. Default: nn.LayerNorm.

TYPE: nn.Cell DEFAULT: nn.LayerNorm

patch_norm

If True, add normalization after patch embedding. Default: True.

TYPE: bool DEFAULT: True

pretrained_window_sizes

Pretrained window sizes of each layer. Default: [0, 0, 0, 0].

TYPE: List[int] DEFAULT: [0, 0, 0, 0]

Source code in mindcv/models/swintransformerv2.py
class SwinTransformerV2(nn.Cell):\nr\"\"\"SwinTransformerV2 model class, based on\n    `\"Swin Transformer V2: Scaling Up Capacity and Resolution\" <https://arxiv.org/abs/2111.09883>`_\n\n    Args:\n        image_size: Input image size. Default: 256.\n        patch_size: Patch size. Default: 4.\n        in_channels: Number the channels of the input. Default: 3.\n        num_classes: Number of classification classes. Default: 1000.\n        embed_dim: Patch embedding dimension. Default: 96.\n        depths: Depth of each Swin Transformer layer. Default: [2, 2, 6, 2].\n        num_heads: Number of attention heads in different layers. Default: [3, 6, 12, 24].\n        window_size: Window size. Default: 7.\n        mlp_ratio: Ratio of mlp hidden dim to embedding dim. Default: 4.\n        qkv_bias: If True, add a bias for query, key, value. Default: True.\n        drop_rate: Drop probability for the Dropout layer. Default: 0.\n        attn_drop_rate: Attention drop probability for the Dropout layer. Default: 0.\n        drop_path_rate: Stochastic depth rate. Default: 0.1.\n        norm_layer: Normalization layer. Default: nn.LayerNorm.\n        patch_norm: If True, add normalization after patch embedding. Default: True.\n        pretrained_window_sizes: Pretrained window sizes of each layer. Default: [0, 0, 0, 0].\n    \"\"\"\n\n    def __init__(\n        self,\n        image_size: int = 256,\n        patch_size: int = 4,\n        in_channels: int = 3,\n        num_classes: int = 1000,\n        embed_dim: int = 96,\n        depths: List[int] = [2, 2, 6, 2],\n        num_heads: List[int] = [3, 6, 12, 24],\n        window_size: int = 7,\n        mlp_ratio: float = 4.0,\n        qkv_bias: bool = True,\n        drop_rate: float = 0.0,\n        attn_drop_rate: float = 0.0,\n        drop_path_rate: float = 0.1,\n        norm_layer: nn.Cell = nn.LayerNorm,\n        patch_norm: bool = True,\n        pretrained_window_sizes: List[int] = [0, 0, 0, 0],\n    ) -> None:\n        super().__init__()\n        self.num_classes = num_classes\n        self.num_layers = len(depths)\n        self.embed_dim = embed_dim\n        self.in_channels = in_channels\n        self.patch_size = patch_size\n        self.patch_norm = patch_norm\n        self.num_features = int(embed_dim * 2 ** (self.num_layers - 1))\n        self.mlp_ratio = mlp_ratio\n\n        # split image into non-overlapping patches\n        self.patch_embed = PatchEmbed(\n            image_size=image_size, patch_size=patch_size, in_chans=in_channels, embed_dim=embed_dim,\n            norm_layer=norm_layer if self.patch_norm else None)\n        num_patches = self.patch_embed.num_patches\n        self.num_patches = num_patches\n        patches_resolution = self.patch_embed.patches_resolution\n        self.patches_resolution = patches_resolution\n\n        self.pos_drop = Dropout(p=drop_rate)\n\n        # stochastic depth\n        dpr = [x for x in np.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule\n\n        # build layers\n        self.layers = nn.CellList()\n        self.final_seq = num_patches  # downsample seq_length\n        for i_layer in range(self.num_layers):\n            layer = BasicLayer(\n                dim=int(embed_dim * 2**i_layer),\n                input_resolution=(patches_resolution[0] // (2**i_layer),\n                                  patches_resolution[1] // (2**i_layer)),\n                depth=depths[i_layer],\n                num_heads=num_heads[i_layer],\n                window_size=window_size,\n                mlp_ratio=self.mlp_ratio,\n                qkv_bias=qkv_bias,\n                drop=drop_rate, attn_drop=attn_drop_rate,\n                drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],\n                norm_layer=norm_layer,\n                downsample=PatchMerging if (i_layer < self.num_layers - 1) else None,\n                pretrained_window_size=pretrained_window_sizes[i_layer]\n            )\n            # downsample seq_length\n            if i_layer < self.num_layers - 1:\n                self.final_seq = self.final_seq // 4\n            self.layers.append(layer)\n        self.head = nn.Dense(self.num_features, self.num_classes)\n\n        self.norm = norm_layer([self.num_features, ], epsilon=1e-6)\n        self.avgpool = ops.ReduceMean(keep_dims=False)\n\n        self._initialize_weights()\n\n    def _initialize_weights(self):\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                cell.weight.set_data(init.initializer(init.HeUniform(), cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, nn.LayerNorm):\n                cell.gamma.set_data(init.initializer(\"ones\", cell.gamma.shape, cell.gamma.dtype))\n                cell.beta.set_data(init.initializer(\"zeros\", cell.beta.shape, cell.beta.dtype))\n            elif isinstance(cell, nn.Dense):\n                cell.weight.set_data(\n                    init.initializer(init.TruncatedNormal(sigma=0.02), cell.weight.shape, cell.weight.dtype)\n                )\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.patch_embed(x)\n        x = self.pos_drop(x)\n        for layer in self.layers:\n            x = layer(x)\n        x = self.norm(x)  # B L C\n        x = self.avgpool(ops.transpose(x, (0, 2, 1)), 2)  # B C 1\n        return x\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.head(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.swintransformerv2.swinv2_base_window16","title":"mindcv.models.swintransformerv2.swinv2_base_window16(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/swintransformerv2.py
@register_model\ndef swinv2_base_window16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"swinv2_base_window16\"]\n    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,\n                              window_size=16, embed_dim=128, depths=[2, 2, 18, 2],\n                              num_heads=[4, 8, 16, 32], **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.swintransformerv2.swinv2_base_window8","title":"mindcv.models.swintransformerv2.swinv2_base_window8(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/swintransformerv2.py
@register_model\ndef swinv2_base_window8(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"swinv2_base_window8\"]\n    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,\n                              window_size=8, embed_dim=128, depths=[2, 2, 18, 2],\n                              num_heads=[4, 8, 16, 32], **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.swintransformerv2.swinv2_small_window16","title":"mindcv.models.swintransformerv2.swinv2_small_window16(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/swintransformerv2.py
@register_model\ndef swinv2_small_window16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"swinv2_small_window16\"]\n    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,\n                              window_size=16, embed_dim=96, depths=[2, 2, 18, 2],\n                              num_heads=[3, 6, 12, 24], **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.swintransformerv2.swinv2_small_window8","title":"mindcv.models.swintransformerv2.swinv2_small_window8(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/swintransformerv2.py
@register_model\ndef swinv2_small_window8(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"swinv2_small_window8\"]\n    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,\n                              window_size=8, embed_dim=96, depths=[2, 2, 18, 2],\n                              num_heads=[3, 6, 12, 24], **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.swintransformerv2.swinv2_tiny_window16","title":"mindcv.models.swintransformerv2.swinv2_tiny_window16(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/swintransformerv2.py
@register_model\ndef swinv2_tiny_window16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"swinv2_tiny_window16\"]\n    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,\n                              window_size=16, embed_dim=96, depths=[2, 2, 6, 2],\n                              num_heads=[3, 6, 12, 24], **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.swintransformerv2.swinv2_tiny_window8","title":"mindcv.models.swintransformerv2.swinv2_tiny_window8(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"Source code in mindcv/models/swintransformerv2.py
@register_model\ndef swinv2_tiny_window8(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n    default_cfg = default_cfgs[\"swinv2_tiny_window8\"]\n    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,\n                              window_size=8, embed_dim=96, depths=[2, 2, 6, 2],\n                              num_heads=[3, 6, 12, 24], **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#vgg","title":"vgg","text":""},{"location":"reference/models/#mindcv.models.vgg.VGG","title":"mindcv.models.vgg.VGG","text":"

Bases: nn.Cell

VGGNet model class, based on \"Very Deep Convolutional Networks for Large-Scale Image Recognition\" <https://arxiv.org/abs/1409.1556>_

PARAMETER DESCRIPTION model_name

name of the architecture. 'vgg11', 'vgg13', 'vgg16' or 'vgg19'.

TYPE: str

batch_norm

use batch normalization or not. Default: False.

TYPE: bool DEFAULT: False

num_classes

number of classification classes. Default: 1000.

TYPE: int DEFAULT: 1000

in_channels

number the channels of the input. Default: 3.

TYPE: int DEFAULT: 3

drop_rate

dropout rate of the classifier. Default: 0.5.

TYPE: float DEFAULT: 0.5

Source code in mindcv/models/vgg.py
class VGG(nn.Cell):\nr\"\"\"VGGNet model class, based on\n    `\"Very Deep Convolutional Networks for Large-Scale Image Recognition\" <https://arxiv.org/abs/1409.1556>`_\n\n    Args:\n        model_name: name of the architecture. 'vgg11', 'vgg13', 'vgg16' or 'vgg19'.\n        batch_norm: use batch normalization or not. Default: False.\n        num_classes: number of classification classes. Default: 1000.\n        in_channels: number the channels of the input. Default: 3.\n        drop_rate: dropout rate of the classifier. Default: 0.5.\n    \"\"\"\n\n    def __init__(\n        self,\n        model_name: str,\n        batch_norm: bool = False,\n        num_classes: int = 1000,\n        in_channels: int = 3,\n        drop_rate: float = 0.5,\n    ) -> None:\n        super().__init__()\n        cfg = cfgs[model_name]\n        self.features = _make_layers(cfg, batch_norm=batch_norm, in_channels=in_channels)\n        self.flatten = nn.Flatten()\n        self.classifier = nn.SequentialCell([\n            nn.Dense(512 * 7 * 7, 4096),\n            nn.ReLU(),\n            Dropout(p=drop_rate),\n            nn.Dense(4096, 4096),\n            nn.ReLU(),\n            Dropout(p=drop_rate),\n            nn.Dense(4096, num_classes),\n        ])\n        self._initialize_weights()\n\n    def _initialize_weights(self) -> None:\n\"\"\"Initialize weights for cells.\"\"\"\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Conv2d):\n                cell.weight.set_data(\n                    init.initializer(init.HeNormal(math.sqrt(5), mode=\"fan_out\", nonlinearity=\"relu\"),\n                                     cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(\n                        init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, nn.Dense):\n                cell.weight.set_data(\n                    init.initializer(init.Normal(0.01), cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(init.initializer(\"zeros\", cell.bias.shape, cell.bias.dtype))\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.features(x)\n        return x\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        x = self.flatten(x)\n        x = self.classifier(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.vgg.vgg11","title":"mindcv.models.vgg.vgg11(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 11 layers VGG model. Refer to the base class models.VGG for more details.

Source code in mindcv/models/vgg.py
@register_model\ndef vgg11(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VGG:\n\"\"\"Get 11 layers VGG model.\n    Refer to the base class `models.VGG` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"vgg11\"]\n    model = VGG(model_name=\"vgg11\", num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.vgg.vgg13","title":"mindcv.models.vgg.vgg13(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 13 layers VGG model. Refer to the base class models.VGG for more details.

Source code in mindcv/models/vgg.py
@register_model\ndef vgg13(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VGG:\n\"\"\"Get 13 layers VGG model.\n    Refer to the base class `models.VGG` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"vgg13\"]\n    model = VGG(model_name=\"vgg13\", num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.vgg.vgg16","title":"mindcv.models.vgg.vgg16(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 16 layers VGG model. Refer to the base class models.VGG for more details.

Source code in mindcv/models/vgg.py
@register_model\ndef vgg16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VGG:\n\"\"\"Get 16 layers VGG model.\n    Refer to the base class `models.VGG` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"vgg16\"]\n    model = VGG(model_name=\"vgg16\", num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.vgg.vgg19","title":"mindcv.models.vgg.vgg19(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get 19 layers VGG model. Refer to the base class models.VGG for more details.

Source code in mindcv/models/vgg.py
@register_model\ndef vgg19(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VGG:\n\"\"\"Get 19 layers VGG model.\n    Refer to the base class `models.VGG` for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"vgg19\"]\n    model = VGG(model_name=\"vgg19\", num_classes=num_classes, in_channels=in_channels, **kwargs)\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#visformer","title":"visformer","text":""},{"location":"reference/models/#mindcv.models.visformer.Visformer","title":"mindcv.models.visformer.Visformer","text":"

Bases: nn.Cell

Visformer model class, based on '\"Visformer: The Vision-friendly Transformer\" https://arxiv.org/pdf/2104.12533.pdf'

PARAMETER DESCRIPTION image_size

images input size. Default: 224.

TYPE: int)

number

32.

TYPE: the channels of the input. Default

num_classes

number of classification classes. Default: 1000.

TYPE: int) DEFAULT: 1000

embed_dim

embedding dimension in all head. Default: 384.

TYPE: int) DEFAULT: 384

depth

model block depth. Default: None.

TYPE: int) DEFAULT: None

num_heads

number of heads. Default: None.

TYPE: int) DEFAULT: None

mlp_ratio

ratio of hidden features in Mlp. Default: 4.

TYPE: float) DEFAULT: 4.0

qkv_bias

have bias in qkv layers or not. Default: False.

TYPE: bool) DEFAULT: False

qk_scale

Override default qk scale of head_dim ** -0.5 if set.

TYPE: float) DEFAULT: None

drop_rate

dropout rate. Default: 0.

TYPE: float) DEFAULT: 0.0

attn_drop_rate

attention layers dropout rate. Default: 0.

TYPE: float) DEFAULT: 0.0

drop_path_rate

drop path rate. Default: 0.1.

TYPE: float) DEFAULT: 0.1

attn_stage

block will have a attention layer if value = '1' else not. Default: '1111'.

TYPE: str) DEFAULT: '1111'

pos_embed

position embedding. Default: True.

TYPE: bool) DEFAULT: True

spatial_conv

block will have a spatial convolution layer if value = '1' else not. Default: '1111'.

TYPE: str) DEFAULT: '1111'

group

convolution group. Default: 8.

TYPE: int) DEFAULT: 8

pool

if true will use global_pooling else not. Default: True.

TYPE: bool) DEFAULT: True

conv_init

if true will init convolution weights else not. Default: False.

DEFAULT: False

Source code in mindcv/models/visformer.py
class Visformer(nn.Cell):\nr\"\"\"Visformer model class, based on\n    '\"Visformer: The Vision-friendly Transformer\"\n    <https://arxiv.org/pdf/2104.12533.pdf>'\n\n    Args:\n        image_size (int) : images input size. Default: 224.\n        number the channels of the input. Default: 32.\n        num_classes (int) : number of classification classes. Default: 1000.\n        embed_dim (int) : embedding dimension in all head. Default: 384.\n        depth (int) : model block depth. Default: None.\n        num_heads (int) : number of heads. Default: None.\n        mlp_ratio (float) : ratio of hidden features in Mlp. Default: 4.\n        qkv_bias (bool) : have bias in qkv layers or not. Default: False.\n        qk_scale (float) : Override default qk scale of head_dim ** -0.5 if set.\n        drop_rate (float) : dropout rate. Default: 0.\n        attn_drop_rate (float) : attention layers dropout rate. Default: 0.\n        drop_path_rate (float) : drop path rate. Default: 0.1.\n        attn_stage (str) : block will have a attention layer if value = '1' else not. Default: '1111'.\n        pos_embed (bool) : position embedding. Default: True.\n        spatial_conv (str) : block will have a spatial convolution layer if value = '1' else not. Default: '1111'.\n        group (int) : convolution group. Default: 8.\n        pool (bool) : if true will use global_pooling else not. Default: True.\n        conv_init : if true will init convolution weights else not. Default: False.\n    \"\"\"\n\n    def __init__(\n        self,\n        img_size: int = 224,\n        init_channels: int = 32,\n        num_classes: int = 1000,\n        embed_dim: int = 384,\n        depth: List[int] = None,\n        num_heads: List[int] = None,\n        mlp_ratio: float = 4.0,\n        qkv_bias: bool = False,\n        qk_scale: float = None,\n        drop_rate: float = 0.0,\n        attn_drop_rate: float = 0.0,\n        drop_path_rate: float = 0.1,\n        attn_stage: str = \"1111\",\n        pos_embed: bool = True,\n        spatial_conv: str = \"1111\",\n        group: int = 8,\n        pool: bool = True,\n        conv_init: bool = False,\n    ) -> None:\n        super(Visformer, self).__init__()\n        self.num_classes = num_classes\n        self.num_features = self.embed_dim = embed_dim\n        self.init_channels = init_channels\n        self.img_size = img_size\n        self.pool = pool\n        self.conv_init = conv_init\n        self.depth = depth\n        assert (isinstance(depth, list) or isinstance(depth, tuple)) and len(depth) == 4\n        if not (isinstance(num_heads, list) or isinstance(num_heads, tuple)):\n            num_heads = [num_heads] * 4\n\n        self.pos_embed = pos_embed\n        dpr = np.linspace(0, drop_path_rate, sum(depth)).tolist()\n\n        self.stem = nn.SequentialCell([\n            nn.Conv2d(3, self.init_channels, 7, 2, pad_mode=\"pad\", padding=3),\n            nn.BatchNorm2d(self.init_channels),\n            nn.ReLU()\n        ])\n        img_size //= 2\n\n        self.pos_drop = Dropout(p=drop_rate)\n        # stage0\n        if depth[0]:\n            self.patch_embed0 = PatchEmbed(img_size=img_size, patch_size=2, in_chans=self.init_channels,\n                                           embed_dim=embed_dim // 4)\n            img_size //= 2\n            if self.pos_embed:\n                self.pos_embed0 = mindspore.Parameter(\n                    ops.zeros((1, embed_dim // 4, img_size, img_size), mindspore.float32))\n            self.stage0 = nn.CellList([\n                Block(dim=embed_dim // 4, num_heads=num_heads[0], head_dim_ratio=0.25, mlp_ratio=mlp_ratio,\n                      qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],\n                      group=group, attn_disabled=(attn_stage[0] == \"0\"), spatial_conv=(spatial_conv[0] == \"1\"))\n                for i in range(depth[0])\n            ])\n\n        # stage1\n        if depth[0]:\n            self.patch_embed1 = PatchEmbed(img_size=img_size, patch_size=2, in_chans=embed_dim // 4,\n                                           embed_dim=embed_dim // 2)\n            img_size //= 2\n        else:\n            self.patch_embed1 = PatchEmbed(img_size=img_size, patch_size=4, in_chans=self.init_channels,\n                                           embed_dim=embed_dim // 2)\n            img_size //= 4\n\n        if self.pos_embed:\n            self.pos_embed1 = mindspore.Parameter(ops.zeros((1, embed_dim // 2, img_size, img_size), mindspore.float32))\n\n        self.stage1 = nn.CellList([\n            Block(\n                dim=embed_dim // 2, num_heads=num_heads[1], head_dim_ratio=0.5, mlp_ratio=mlp_ratio,\n                qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],\n                group=group, attn_disabled=(attn_stage[1] == \"0\"), spatial_conv=(spatial_conv[1] == \"1\")\n            )\n            for i in range(sum(depth[:1]), sum(depth[:2]))\n        ])\n\n        # stage2\n        self.patch_embed2 = PatchEmbed(img_size=img_size, patch_size=2, in_chans=embed_dim // 2, embed_dim=embed_dim)\n        img_size //= 2\n        if self.pos_embed:\n            self.pos_embed2 = mindspore.Parameter(ops.zeros((1, embed_dim, img_size, img_size), mindspore.float32))\n        self.stage2 = nn.CellList([\n            Block(\n                dim=embed_dim, num_heads=num_heads[2], head_dim_ratio=1.0, mlp_ratio=mlp_ratio,\n                qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],\n                group=group, attn_disabled=(attn_stage[2] == \"0\"), spatial_conv=(spatial_conv[2] == \"1\")\n            )\n            for i in range(sum(depth[:2]), sum(depth[:3]))\n        ])\n\n        # stage3\n        self.patch_embed3 = PatchEmbed(img_size=img_size, patch_size=2, in_chans=embed_dim, embed_dim=embed_dim * 2)\n        img_size //= 2\n        if self.pos_embed:\n            self.pos_embed3 = mindspore.Parameter(ops.zeros((1, embed_dim * 2, img_size, img_size), mindspore.float32))\n        self.stage3 = nn.CellList([\n            Block(\n                dim=embed_dim * 2, num_heads=num_heads[3], head_dim_ratio=1.0, mlp_ratio=mlp_ratio,\n                qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],\n                group=group, attn_disabled=(attn_stage[3] == \"0\"), spatial_conv=(spatial_conv[3] == \"1\")\n            )\n            for i in range(sum(depth[:3]), sum(depth[:4]))\n        ])\n\n        # head\n        if self.pool:\n            self.global_pooling = GlobalAvgPooling()\n\n        self.norm = nn.BatchNorm2d(embed_dim * 2)\n        self.head = nn.Dense(embed_dim * 2, num_classes)\n\n        # weight init\n        if self.pos_embed:\n            if depth[0]:\n                self.pos_embed0.set_data(initializer(TruncatedNormal(0.02),\n                                                     self.pos_embed0.shape, self.pos_embed0.dtype))\n            self.pos_embed1.set_data(initializer(TruncatedNormal(0.02),\n                                                 self.pos_embed1.shape, self.pos_embed1.dtype))\n            self.pos_embed2.set_data(initializer(TruncatedNormal(0.02),\n                                                 self.pos_embed2.shape, self.pos_embed2.dtype))\n            self.pos_embed3.set_data(initializer(TruncatedNormal(0.02),\n                                                 self.pos_embed3.shape, self.pos_embed3.dtype))\n        self._initialize_weights()\n\n    def _initialize_weights(self) -> None:\n        for _, cell in self.cells_and_names():\n            if isinstance(cell, nn.Dense):\n                cell.weight.set_data(initializer(TruncatedNormal(0.02), cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(initializer(Constant(0), cell.bias.shape, cell.bias.dtype))\n            elif isinstance(cell, nn.LayerNorm):\n                cell.beta.set_data(initializer(Constant(0), cell.beta.shape, cell.beta.dtype))\n                cell.gamma.set_data(initializer(Constant(1), cell.gamma.shape, cell.gamma.dtype))\n            elif isinstance(cell, nn.BatchNorm2d):\n                cell.beta.set_data(initializer(Constant(0), cell.beta.shape, cell.beta.dtype))\n                cell.gamma.set_data(initializer(Constant(1), cell.gamma.shape, cell.gamma.dtype))\n            elif isinstance(cell, nn.Conv2d):\n                if self.conv_init:\n                    cell.weight.set_data(initializer(HeNormal(mode=\"fan_out\", nonlinearity=\"relu\"), cell.weight.shape,\n                                                     cell.weight.dtype))\n                else:\n                    cell.weight.set_data(initializer(TruncatedNormal(0.02), cell.weight.shape, cell.weight.dtype))\n                if cell.bias is not None:\n                    cell.bias.set_data(initializer(Constant(0), cell.bias.shape, cell.bias.dtype))\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.stem(x)\n\n        # stage 0\n        if self.depth[0]:\n            x = self.patch_embed0(x)\n            if self.pos_embed:\n                x = x + self.pos_embed0\n                x = self.pos_drop(x)\n            for b in self.stage0:\n                x = b(x)\n\n        # stage 1\n        x = self.patch_embed1(x)\n        if self.pos_embed:\n            x = x + self.pos_embed1\n            x = self.pos_drop(x)\n        for b in self.stage1:\n            x = b(x)\n\n        # stage 2\n        x = self.patch_embed2(x)\n        if self.pos_embed:\n            x = x + self.pos_embed2\n            x = self.pos_drop(x)\n        for b in self.stage2:\n            x = b(x)\n\n        # stage 3\n        x = self.patch_embed3(x)\n        if self.pos_embed:\n            x = x + self.pos_embed3\n            x = self.pos_drop(x)\n        for b in self.stage3:\n            x = b(x)\n        x = self.norm(x)\n        return x\n\n    def forward_head(self, x: Tensor) -> Tensor:\n        # head\n        if self.pool:\n            x = self.global_pooling(x)\n        else:\n            x = x[:, :, 0, 0]\n        x = self.head(x.view(x.shape[0], -1))\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        x = self.forward_head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.visformer.visformer_small","title":"mindcv.models.visformer.visformer_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get visformer small model. Refer to the base class 'models.visformer' for more details.

Source code in mindcv/models/visformer.py
@register_model\ndef visformer_small(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):\n\"\"\"Get visformer small model.\n    Refer to the base class 'models.visformer' for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"visformer_small\"]\n    model = Visformer(img_size=224, init_channels=32, num_classes=num_classes, embed_dim=384,\n                      depth=[0, 7, 4, 4], num_heads=[6, 6, 6, 6], mlp_ratio=4., group=8,\n                      attn_stage=\"0011\", spatial_conv=\"1100\", conv_init=True, **kwargs)\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.visformer.visformer_small_v2","title":"mindcv.models.visformer.visformer_small_v2(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get visformer small2 model. Refer to the base class 'models.visformer' for more details.

Source code in mindcv/models/visformer.py
@register_model\ndef visformer_small_v2(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):\n\"\"\"Get visformer small2 model.\n    Refer to the base class 'models.visformer' for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"visformer_small_v2\"]\n    model = Visformer(img_size=224, init_channels=32, num_classes=num_classes, embed_dim=256,\n                      depth=[1, 10, 14, 3], num_heads=[2, 4, 8, 16], mlp_ratio=4., qk_scale=-0.5,\n                      group=8, attn_stage=\"0011\", spatial_conv=\"1100\", conv_init=True, **kwargs)\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.visformer.visformer_tiny","title":"mindcv.models.visformer.visformer_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get visformer tiny model. Refer to the base class 'models.visformer' for more details.

Source code in mindcv/models/visformer.py
@register_model\ndef visformer_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):\n\"\"\"Get visformer tiny model.\n    Refer to the base class 'models.visformer' for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"visformer_tiny\"]\n    model = Visformer(img_size=224, init_channels=16, num_classes=num_classes, embed_dim=192,\n                      depth=[0, 7, 4, 4], num_heads=[3, 3, 3, 3], mlp_ratio=4., group=8,\n                      attn_stage=\"0011\", spatial_conv=\"1100\", drop_path_rate=0.03, conv_init=True, **kwargs)\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/models/#mindcv.models.visformer.visformer_tiny_v2","title":"mindcv.models.visformer.visformer_tiny_v2(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get visformer tiny2 model. Refer to the base class 'models.visformer' for more details.

Source code in mindcv/models/visformer.py
@register_model\ndef visformer_tiny_v2(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):\n\"\"\"Get visformer tiny2 model.\n    Refer to the base class 'models.visformer' for more details.\n    \"\"\"\n    default_cfg = default_cfgs[\"visformer_tiny_v2\"]\n    model = Visformer(img_size=224, init_channels=24, num_classes=num_classes, embed_dim=192,\n                      depth=[1, 4, 6, 3], num_heads=[1, 3, 6, 12], mlp_ratio=4., qk_scale=-0.5, group=8,\n                      attn_stage=\"0011\", spatial_conv=\"1100\", drop_path_rate=0.03, conv_init=True, **kwargs)\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#vit","title":"vit","text":""},{"location":"reference/models/#mindcv.models.vit.ViT","title":"mindcv.models.vit.ViT","text":"

Bases: nn.Cell

Vision Transformer architecture implementation.

PARAMETER DESCRIPTION image_size

Input image size. Default: 224.

TYPE: int DEFAULT: 224

input_channels

The number of input channel. Default: 3.

TYPE: int DEFAULT: 3

patch_size

Patch size of image. Default: 16.

TYPE: int DEFAULT: 16

embed_dim

The dimension of embedding. Default: 768.

TYPE: int DEFAULT: 768

num_layers

The depth of transformer. Default: 12.

TYPE: int DEFAULT: 12

num_heads

The number of attention heads. Default: 12.

TYPE: int DEFAULT: 12

mlp_dim

The dimension of MLP hidden layer. Default: 3072.

TYPE: int DEFAULT: 3072

keep_prob

The keep rate, greater than 0 and less equal than 1. Default: 1.0.

TYPE: float DEFAULT: 1.0

attention_keep_prob

The keep rate for attention layer. Default: 1.0.

TYPE: float DEFAULT: 1.0

drop_path_keep_prob

The keep rate for drop path. Default: 1.0.

TYPE: float DEFAULT: 1.0

activation

Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. Default: nn.GELU.

TYPE: nn.Cell DEFAULT: nn.GELU

norm

Norm layer that will be stacked on top of the convolution layer. Default: nn.LayerNorm.

TYPE: nn.Cell DEFAULT: nn.LayerNorm

pool

The method of pooling. Default: 'cls'.

TYPE: str DEFAULT: 'cls'

Inputs
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
Outputs

Tensor of shape :math:(N, 768)

RAISES DESCRIPTION ValueError

If split is not 'train', 'test' or 'infer'.

Supported Platforms

GPU

Examples:

>>> net = ViT()\n>>> x = ms.Tensor(np.ones([1, 3, 224, 224]), ms.float32)\n>>> output = net(x)\n>>> print(output.shape)\n(1, 768)\n

About ViT:

Vision Transformer (ViT) shows that a pure transformer applied directly to sequences of image patches can perform very well on image classification tasks. When pre-trained on large amounts of data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet, CIFAR-100, VTAB, etc.), Vision Transformer (ViT) attains excellent results compared to state-of-the-art convolutional networks while requiring substantially fewer computational resources to train.

Citation:

.. code-block::

@article{2020An,\ntitle={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale},\nauthor={Dosovitskiy, A. and Beyer, L. and Kolesnikov, A. and Weissenborn, D. and Houlsby, N.},\nyear={2020},\n}\n
Source code in mindcv/models/vit.py
class ViT(nn.Cell):\n\"\"\"\n    Vision Transformer architecture implementation.\n\n    Args:\n        image_size (int): Input image size. Default: 224.\n        input_channels (int): The number of input channel. Default: 3.\n        patch_size (int): Patch size of image. Default: 16.\n        embed_dim (int): The dimension of embedding. Default: 768.\n        num_layers (int): The depth of transformer. Default: 12.\n        num_heads (int): The number of attention heads. Default: 12.\n        mlp_dim (int): The dimension of MLP hidden layer. Default: 3072.\n        keep_prob (float): The keep rate, greater than 0 and less equal than 1. Default: 1.0.\n        attention_keep_prob (float): The keep rate for attention layer. Default: 1.0.\n        drop_path_keep_prob (float): The keep rate for drop path. Default: 1.0.\n        activation (nn.Cell): Activation function which will be stacked on top of the\n            normalization layer (if not None), otherwise on top of the conv layer. Default: nn.GELU.\n        norm (nn.Cell, optional): Norm layer that will be stacked on top of the convolution\n            layer. Default: nn.LayerNorm.\n        pool (str): The method of pooling. Default: 'cls'.\n\n    Inputs:\n        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.\n\n    Outputs:\n        Tensor of shape :math:`(N, 768)`\n\n    Raises:\n        ValueError: If `split` is not 'train', 'test' or 'infer'.\n\n    Supported Platforms:\n        ``GPU``\n\n    Examples:\n        >>> net = ViT()\n        >>> x = ms.Tensor(np.ones([1, 3, 224, 224]), ms.float32)\n        >>> output = net(x)\n        >>> print(output.shape)\n        (1, 768)\n\n    About ViT:\n\n    Vision Transformer (ViT) shows that a pure transformer applied directly to sequences of image\n    patches can perform very well on image classification tasks. When pre-trained on large amounts\n    of data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet,\n    CIFAR-100, VTAB, etc.), Vision Transformer (ViT) attains excellent results compared to state-of-the-art\n    convolutional networks while requiring substantially fewer computational resources to train.\n\n    Citation:\n\n    .. code-block::\n\n        @article{2020An,\n        title={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale},\n        author={Dosovitskiy, A. and Beyer, L. and Kolesnikov, A. and Weissenborn, D. and Houlsby, N.},\n        year={2020},\n        }\n    \"\"\"\n\n    def __init__(\n        self,\n        image_size: int = 224,\n        input_channels: int = 3,\n        patch_size: int = 16,\n        embed_dim: int = 768,\n        num_layers: int = 12,\n        num_heads: int = 12,\n        mlp_dim: int = 3072,\n        keep_prob: float = 1.0,\n        attention_keep_prob: float = 1.0,\n        drop_path_keep_prob: float = 1.0,\n        activation: nn.Cell = nn.GELU,\n        norm: Optional[nn.Cell] = nn.LayerNorm,\n        pool: str = \"cls\",\n    ) -> None:\n        super().__init__()\n\n        self.patch_embedding = PatchEmbedding(image_size=image_size,\n                                              patch_size=patch_size,\n                                              embed_dim=embed_dim,\n                                              input_channels=input_channels)\n        num_patches = self.patch_embedding.num_patches\n\n        if pool == \"cls\":\n            self.cls_token = init(init_type=Normal(sigma=1.0),\n                                  shape=(1, 1, embed_dim),\n                                  dtype=ms.float32,\n                                  name=\"cls\",\n                                  requires_grad=True)\n            self.pos_embedding = init(init_type=Normal(sigma=1.0),\n                                      shape=(1, num_patches + 1, embed_dim),\n                                      dtype=ms.float32,\n                                      name=\"pos_embedding\",\n                                      requires_grad=True)\n            self.concat = ops.Concat(axis=1)\n        else:\n            self.pos_embedding = init(init_type=Normal(sigma=1.0),\n                                      shape=(1, num_patches, embed_dim),\n                                      dtype=ms.float32,\n                                      name=\"pos_embedding\",\n                                      requires_grad=True)\n            self.mean = ops.ReduceMean(keep_dims=False)\n\n        self.pool = pool\n        self.pos_dropout = Dropout(p=1.0-keep_prob)\n        self.norm = norm((embed_dim,))\n        self.tile = ops.Tile()\n        self.transformer = TransformerEncoder(\n            dim=embed_dim,\n            num_layers=num_layers,\n            num_heads=num_heads,\n            mlp_dim=mlp_dim,\n            keep_prob=keep_prob,\n            attention_keep_prob=attention_keep_prob,\n            drop_path_keep_prob=drop_path_keep_prob,\n            activation=activation,\n            norm=norm,\n        )\n\n    def construct(self, x):\n\"\"\"ViT construct.\"\"\"\n        x = self.patch_embedding(x)\n\n        if self.pool == \"cls\":\n            cls_tokens = self.tile(self.cls_token, (x.shape[0], 1, 1))\n            x = self.concat((cls_tokens, x))\n            x += self.pos_embedding\n        else:\n            x += self.pos_embedding\n        x = self.pos_dropout(x)\n        x = self.transformer(x)\n        x = self.norm(x)\n\n        if self.pool == \"cls\":\n            x = x[:, 0]\n        else:\n            x = self.mean(x, (1, ))  # (1,) or (1,2)\n        return x\n
"},{"location":"reference/models/#mindcv.models.vit.ViT.construct","title":"mindcv.models.vit.ViT.construct(x)","text":"

ViT construct.

Source code in mindcv/models/vit.py
def construct(self, x):\n\"\"\"ViT construct.\"\"\"\n    x = self.patch_embedding(x)\n\n    if self.pool == \"cls\":\n        cls_tokens = self.tile(self.cls_token, (x.shape[0], 1, 1))\n        x = self.concat((cls_tokens, x))\n        x += self.pos_embedding\n    else:\n        x += self.pos_embedding\n    x = self.pos_dropout(x)\n    x = self.transformer(x)\n    x = self.norm(x)\n\n    if self.pool == \"cls\":\n        x = x[:, 0]\n    else:\n        x = self.mean(x, (1, ))  # (1,) or (1,2)\n    return x\n
"},{"location":"reference/models/#mindcv.models.vit.vit_b_16_224","title":"mindcv.models.vit.vit_b_16_224(pretrained=False, num_classes=1000, in_channels=3, image_size=224, has_logits=False, drop_rate=0.0, drop_path_rate=0.0)","text":"

Constructs a vit_b_16 architecture from An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>_.

PARAMETER DESCRIPTION pretrained

Whether to download and load the pre-trained model. Default: False.

TYPE: bool DEFAULT: False

num_classes

The number of classification. Default: 1000.

TYPE: int DEFAULT: 1000

in_channels

The number of input channels. Default: 3.

TYPE: int DEFAULT: 3

image_size

The input image size. Default: 224 for ImageNet.

TYPE: int DEFAULT: 224

has_logits

Whether has logits or not. Default: False.

TYPE: bool DEFAULT: False

drop_rate

The drop out rate. Default: 0.0.s

TYPE: float DEFAULT: 0.0

drop_path_rate

The stochastic depth rate. Default: 0.0.

TYPE: float DEFAULT: 0.0

RETURNS DESCRIPTION ViT

ViT network, MindSpore.nn.Cell

Inputs
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).

Examples:

>>> net = vit_b_16_224()\n>>> x = ms.Tensor(np.ones([1, 3, 224, 224]), ms.float32)\n>>> output = net(x)\n>>> print(output.shape)\n(1, 1000)\n
Outputs

Tensor of shape :math:(N, CLASSES_{out})

Supported Platforms

GPU

Source code in mindcv/models/vit.py
@register_model\ndef vit_b_16_224(\n    pretrained: bool = False,\n    num_classes: int = 1000,\n    in_channels: int = 3,\n    image_size: int = 224,\n    has_logits: bool = False,\n    drop_rate: float = 0.0,\n    # attention-dropout: float = 0.0,\n    drop_path_rate: float = 0.0,\n) -> ViT:\n\"\"\"\n    Constructs a vit_b_16 architecture from\n    `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>`_.\n\n    Args:\n        pretrained (bool): Whether to download and load the pre-trained model. Default: False.\n        num_classes (int): The number of classification. Default: 1000.\n        in_channels (int): The number of input channels. Default: 3.\n        image_size (int): The input image size. Default: 224 for ImageNet.\n        has_logits (bool): Whether has logits or not. Default: False.\n        drop_rate (float): The drop out rate. Default: 0.0.s\n        drop_path_rate (float): The stochastic depth rate. Default: 0.0.\n\n    Returns:\n        ViT network, MindSpore.nn.Cell\n\n    Inputs:\n        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.\n\n    Examples:\n        >>> net = vit_b_16_224()\n        >>> x = ms.Tensor(np.ones([1, 3, 224, 224]), ms.float32)\n        >>> output = net(x)\n        >>> print(output.shape)\n        (1, 1000)\n\n    Outputs:\n        Tensor of shape :math:`(N, CLASSES_{out})`\n\n    Supported Platforms:\n        ``GPU``\n    \"\"\"\n    config = ConfigDict()\n    config.image_size = image_size\n    config.num_classes = num_classes\n    config.patch_size = 16\n    config.embed_dim = 768\n    config.mlp_dim = 3072\n    config.num_heads = 12\n    config.num_layers = 12\n    config.dropout = drop_rate\n    config.attention_dropout = drop_rate  # attention-dropout\n    config.drop_path_rate = drop_path_rate\n    config.pretrained = pretrained\n    config.input_channels = in_channels\n    config.pool = \"cls\"\n    config.representation_size = 768 if has_logits else None\n\n    config.url_cfg = default_cfgs[\"vit_b_16_224\"]\n\n    return vit(**config)\n
"},{"location":"reference/models/#mindcv.models.vit.vit_b_16_384","title":"mindcv.models.vit.vit_b_16_384(pretrained=False, num_classes=1000, in_channels=3, image_size=384, has_logits=False, drop_rate=0.0, drop_path_rate=0.0)","text":"

construct and return a ViT network

Source code in mindcv/models/vit.py
@register_model\ndef vit_b_16_384(\n    pretrained: bool = False,\n    num_classes: int = 1000,\n    in_channels: int = 3,\n    image_size: int = 384,\n    has_logits: bool = False,\n    drop_rate: float = 0.0,\n    # attention-dropout: float = 0.0,\n    drop_path_rate: float = 0.0,\n) -> ViT:\n\"\"\"construct and return a ViT network\"\"\"\n    config = ConfigDict()\n    config.image_size = image_size\n    config.num_classes = num_classes\n    config.patch_size = 16\n    config.embed_dim = 768\n    config.mlp_dim = 3072\n    config.num_heads = 12\n    config.num_layers = 12\n    config.dropout = drop_rate\n    config.attention_dropout = drop_rate  # attention-dropout\n    config.drop_path_rate = drop_path_rate\n    config.pretrained = pretrained\n    config.input_channels = in_channels\n    config.pool = \"cls\"\n    config.representation_size = 768 if has_logits else None\n\n    config.url_cfg = default_cfgs[\"vit_b_16_384\"]\n\n    return vit(**config)\n
"},{"location":"reference/models/#mindcv.models.vit.vit_b_32_224","title":"mindcv.models.vit.vit_b_32_224(pretrained=False, num_classes=1000, in_channels=3, image_size=224, has_logits=False, drop_rate=0.0, drop_path_rate=0.0)","text":"

construct and return a ViT network

Source code in mindcv/models/vit.py
@register_model\ndef vit_b_32_224(\n    pretrained: bool = False,\n    num_classes: int = 1000,\n    in_channels: int = 3,\n    image_size: int = 224,\n    has_logits: bool = False,\n    drop_rate: float = 0.0,\n    # attention-dropout: float = 0.0,\n    drop_path_rate: float = 0.0,\n) -> ViT:\n\"\"\"construct and return a ViT network\"\"\"\n    config = ConfigDict()\n    config.image_size = image_size\n    config.num_classes = num_classes\n    config.patch_size = 32\n    config.embed_dim = 768\n    config.mlp_dim = 3072\n    config.num_heads = 12\n    config.num_layers = 12\n    config.dropout = drop_rate\n    config.attention_dropout = drop_rate  # attention-dropout\n    config.drop_path_rate = drop_path_rate\n    config.pretrained = pretrained\n    config.input_channels = in_channels\n    config.pool = \"cls\"\n    config.representation_size = 768 if has_logits else None\n\n    config.url_cfg = default_cfgs[\"vit_b_32_224\"]\n\n    return vit(**config)\n
"},{"location":"reference/models/#mindcv.models.vit.vit_b_32_384","title":"mindcv.models.vit.vit_b_32_384(pretrained=False, num_classes=1000, in_channels=3, image_size=384, has_logits=False, drop_rate=0.0, drop_path_rate=0.0)","text":"

construct and return a ViT network

Source code in mindcv/models/vit.py
@register_model\ndef vit_b_32_384(\n    pretrained: bool = False,\n    num_classes: int = 1000,\n    in_channels: int = 3,\n    image_size: int = 384,\n    has_logits: bool = False,\n    drop_rate: float = 0.0,\n    # attention_dropout: float = 0.0,\n    drop_path_rate: float = 0.0,\n) -> ViT:\n\"\"\"construct and return a ViT network\"\"\"\n    config = ConfigDict()\n    config.image_size = image_size\n    config.num_classes = num_classes\n    config.patch_size = 32\n    config.embed_dim = 768\n    config.mlp_dim = 3072\n    config.num_heads = 12\n    config.num_layers = 12\n    config.dropout = drop_rate\n    config.attention_dropout = drop_rate  # attention_dropout\n    config.drop_path_rate = drop_path_rate\n    config.pretrained = pretrained\n    config.input_channels = in_channels\n    config.pool = \"cls\"\n    config.representation_size = 768 if has_logits else None\n\n    config.url_cfg = default_cfgs[\"vit_b_32_384\"]\n\n    return vit(**config)\n
"},{"location":"reference/models/#mindcv.models.vit.vit_l_16_224","title":"mindcv.models.vit.vit_l_16_224(pretrained=False, num_classes=1000, in_channels=3, image_size=224, has_logits=False, drop_rate=0.0, drop_path_rate=0.0)","text":"

construct and return a ViT network

Source code in mindcv/models/vit.py
@register_model\ndef vit_l_16_224(\n    pretrained: bool = False,\n    num_classes: int = 1000,\n    in_channels: int = 3,\n    image_size: int = 224,\n    has_logits: bool = False,\n    drop_rate: float = 0.0,\n    # attention-dropout: float = 0.0,\n    drop_path_rate: float = 0.0,\n) -> ViT:\n\"\"\"construct and return a ViT network\"\"\"\n\n    config = ConfigDict()\n    config.image_size = image_size\n    config.num_classes = num_classes\n    config.patch_size = 16\n    config.embed_dim = 1024\n    config.mlp_dim = 4096\n    config.num_heads = 16\n    config.num_layers = 24\n    config.dropout = drop_rate\n    config.attention_dropout = drop_rate  # attention-dropout\n    config.drop_path_rate = drop_path_rate\n    config.input_channels = in_channels\n    config.pool = \"cls\"\n    config.pretrained = pretrained\n    config.representation_size = 1024 if has_logits else None\n\n    config.url_cfg = default_cfgs[\"vit_l_16_224\"]\n\n    return vit(**config)\n
"},{"location":"reference/models/#mindcv.models.vit.vit_l_16_384","title":"mindcv.models.vit.vit_l_16_384(pretrained=False, num_classes=1000, in_channels=3, image_size=384, has_logits=False, drop_rate=0.0, drop_path_rate=0.0)","text":"

construct and return a ViT network

Source code in mindcv/models/vit.py
@register_model\ndef vit_l_16_384(\n    pretrained: bool = False,\n    num_classes: int = 1000,\n    in_channels: int = 3,\n    image_size: int = 384,\n    has_logits: bool = False,\n    drop_rate: float = 0.0,\n    # attention-dropout: float = 0.0,\n    drop_path_rate: float = 0.0,\n) -> ViT:\n\"\"\"construct and return a ViT network\"\"\"\n\n    config = ConfigDict()\n    config.image_size = image_size\n    config.num_classes = num_classes\n    config.patch_size = 16\n    config.embed_dim = 1024\n    config.mlp_dim = 4096\n    config.num_heads = 16\n    config.num_layers = 24\n    config.dropout = drop_rate\n    config.attention_dropout = drop_rate  # attention-dropout\n    config.drop_path_rate = drop_path_rate\n    config.input_channels = in_channels\n    config.pool = \"cls\"\n    config.pretrained = pretrained\n    config.representation_size = 1024 if has_logits else None\n\n    config.url_cfg = default_cfgs[\"vit_l_16_384\"]\n\n    return vit(**config)\n
"},{"location":"reference/models/#mindcv.models.vit.vit_l_32_224","title":"mindcv.models.vit.vit_l_32_224(pretrained=False, num_classes=1000, in_channels=3, image_size=224, has_logits=False, drop_rate=0.0, drop_path_rate=0.0)","text":"

construct and return a ViT network

Source code in mindcv/models/vit.py
@register_model\ndef vit_l_32_224(\n    pretrained: bool = False,\n    num_classes: int = 1000,\n    in_channels: int = 3,\n    image_size: int = 224,\n    has_logits: bool = False,\n    drop_rate: float = 0.0,\n    # attention-dropout: float = 0.0,\n    drop_path_rate: float = 0.0,\n) -> ViT:\n\"\"\"construct and return a ViT network\"\"\"\n    config = ConfigDict()\n    config.image_size = image_size\n    config.num_classes = num_classes\n    config.patch_size = 32\n    config.embed_dim = 1024\n    config.mlp_dim = 4096\n    config.num_heads = 16\n    config.num_layers = 24\n    config.dropout = drop_rate\n    config.attention_dropout = drop_rate  # attention-dropout\n    config.drop_path_rate = drop_path_rate\n    config.pretrained = pretrained\n    config.input_channels = in_channels\n    config.pool = \"cls\"\n    config.representation_size = 1024 if has_logits else None\n\n    config.url_cfg = default_cfgs[\"vit_l_32_224\"]\n\n    return vit(**config)\n
"},{"location":"reference/models/#volo","title":"volo","text":""},{"location":"reference/models/#mindcv.models.volo.VOLO","title":"mindcv.models.volo.VOLO","text":"

Bases: nn.Cell

Vision Outlooker, the main class of our model --layers: [x,x,x,x], four blocks in two stages, the first block is outlooker, the other three are transformer, we set four blocks, which are easily applied to downstream tasks --img_size, --in_channels, --num_classes: these three are very easy to understand --patch_size: patch_size in outlook attention --stem_hidden_dim: hidden dim of patch embedding, d1-d4 is 64, d5 is 128 --embed_dims, --num_heads: embedding dim, number of heads in each block --downsamples: flags to apply downsampling or not --outlook_attention: flags to apply outlook attention or not --mlp_ratios, --qkv_bias, --qk_scale, --drop_rate: easy to undertand --attn_drop_rate, --drop_path_rate, --norm_layer: easy to undertand --post_layers: post layers like two class attention layers using [ca, ca], if yes, return_mean=False --return_mean: use mean of all feature tokens for classification, if yes, no class token --return_dense: use token labeling, details are here: https://github.com/zihangJiang/TokenLabeling --mix_token: mixing tokens as token labeling, details are here: https://github.com/zihangJiang/TokenLabeling --pooling_scale: pooling_scale=2 means we downsample 2x --out_kernel, --out_stride, --out_padding: kerner size, stride, and padding for outlook attention

Source code in mindcv/models/volo.py
class VOLO(nn.Cell):\n\"\"\"\n    Vision Outlooker, the main class of our model\n    --layers: [x,x,x,x], four blocks in two stages, the first block is outlooker, the\n              other three are transformer, we set four blocks, which are easily\n              applied to downstream tasks\n    --img_size, --in_channels, --num_classes: these three are very easy to understand\n    --patch_size: patch_size in outlook attention\n    --stem_hidden_dim: hidden dim of patch embedding, d1-d4 is 64, d5 is 128\n    --embed_dims, --num_heads: embedding dim, number of heads in each block\n    --downsamples: flags to apply downsampling or not\n    --outlook_attention: flags to apply outlook attention or not\n    --mlp_ratios, --qkv_bias, --qk_scale, --drop_rate: easy to undertand\n    --attn_drop_rate, --drop_path_rate, --norm_layer: easy to undertand\n    --post_layers: post layers like two class attention layers using [ca, ca],\n                  if yes, return_mean=False\n    --return_mean: use mean of all feature tokens for classification, if yes, no class token\n    --return_dense: use token labeling, details are here:\n                    https://github.com/zihangJiang/TokenLabeling\n    --mix_token: mixing tokens as token labeling, details are here:\n                    https://github.com/zihangJiang/TokenLabeling\n    --pooling_scale: pooling_scale=2 means we downsample 2x\n    --out_kernel, --out_stride, --out_padding: kerner size,\n                                               stride, and padding for outlook attention\n    \"\"\"\n    def __init__(\n        self,\n        layers,\n        img_size=224,\n        in_channels=3,\n        num_classes=1000,\n        patch_size=8,\n        stem_hidden_dim=64,\n        embed_dims=None,\n        num_heads=None,\n        downsamples=None,\n        outlook_attention=None,\n        mlp_ratios=None,\n        qkv_bias=False,\n        qk_scale=None,\n        drop_rate=0.0,\n        attn_drop_rate=0.0,\n        drop_path_rate=0.0,\n        norm_layer=nn.LayerNorm,\n        post_layers=None,\n        return_mean=False,\n        return_dense=True,\n        mix_token=True,\n        pooling_scale=2,\n        out_kernel=3,\n        out_stride=2,\n        out_padding=1,\n    ) -> None:\n\n        super().__init__()\n        self.num_classes = num_classes\n        self.patch_embed = PatchEmbed(stem_conv=True, stem_stride=2, patch_size=patch_size,\n                                      in_channels=in_channels, hidden_dim=stem_hidden_dim,\n                                      embed_dim=embed_dims[0])\n        # inital positional encoding, we add positional encoding after outlooker blocks\n        self.pos_embed = Parameter(\n            ops.zeros((1, img_size // patch_size // pooling_scale,\n                      img_size // patch_size // pooling_scale,\n                      embed_dims[-1]), mstype.float32))\n\n        self.pos_drop = Dropout(p=drop_rate)\n\n        # set the main block in network\n        network = []\n        for i in range(len(layers)):\n            if outlook_attention[i]:\n                # stage 1\n                stage = outlooker_blocks(Outlooker, i, embed_dims[i], layers,\n                                         downsample=downsamples[i], num_heads=num_heads[i],\n                                         kernel_size=out_kernel, stride=out_stride,\n                                         padding=out_padding, mlp_ratio=mlp_ratios[i],\n                                         qkv_bias=qkv_bias, qk_scale=qk_scale,\n                                         attn_drop=attn_drop_rate, norm_layer=norm_layer)\n                network.append(stage)\n            else:\n                # stage 2\n                stage = transformer_blocks(Transformer, i, embed_dims[i], layers,\n                                           num_heads[i], mlp_ratio=mlp_ratios[i],\n                                           qkv_bias=qkv_bias, qk_scale=qk_scale,\n                                           drop_path_rate=drop_path_rate,\n                                           attn_drop=attn_drop_rate,\n                                           norm_layer=norm_layer)\n                network.append(stage)\n\n            if downsamples[i]:\n                # downsampling between two stages\n                network.append(Downsample(embed_dims[i], embed_dims[i + 1], 2))\n\n        self.network = nn.CellList(network)\n\n        # set post block, for example, class attention layers\n        self.post_network = None\n        if post_layers is not None:\n            self.post_network = nn.CellList([\n                get_block(post_layers[i],\n                          dim=embed_dims[-1],\n                          num_heads=num_heads[-1],\n                          mlp_ratio=mlp_ratios[-1],\n                          qkv_bias=qkv_bias,\n                          qk_scale=qk_scale,\n                          attn_drop=attn_drop_rate,\n                          drop_path=0.0,\n                          norm_layer=norm_layer)\n                for i in range(len(post_layers))\n            ])\n            self.cls_token = Parameter(ops.zeros((1, 1, embed_dims[-1]), mstype.float32))\n            self.cls_token.set_data(init.initializer(init.TruncatedNormal(sigma=.02), self.cls_token.data.shape))\n\n        # set output type\n        self.return_mean = return_mean  # if yes, return mean, not use class token\n        self.return_dense = return_dense  # if yes, return class token and all feature tokens\n        if return_dense:\n            assert not return_mean, \"cannot return both mean and dense\"\n        self.mix_token = mix_token\n        self.pooling_scale = pooling_scale\n        if mix_token:  # enable token mixing, see token labeling for details.\n            self.beta = 1.0\n            assert return_dense, \"return all tokens if mix_token is enabled\"\n        if return_dense:\n            self.aux_head = nn.Dense(\n                embed_dims[-1],\n                num_classes) if num_classes > 0 else Identity()\n        self.norm = norm_layer([embed_dims[-1]])\n\n        # Classifier head\n        self.head = nn.Dense(\n            embed_dims[-1], num_classes) if num_classes > 0 else Identity()\n\n        self.pos_embed.set_data(init.initializer(init.TruncatedNormal(sigma=.02), self.pos_embed.data.shape))\n        self._init_weights()\n\n    def _init_weights(self) -> None:\n        for name, m in self.cells_and_names():\n            if isinstance(m, nn.Dense):\n                m.weight.set_data(init.initializer(init.TruncatedNormal(sigma=.02), m.weight.data.shape))\n                if m.bias is not None:\n                    m.bias.set_data(init.initializer(init.Constant(0), m.bias.shape))\n            elif isinstance(m, nn.LayerNorm):\n                m.gamma.set_data(init.initializer(init.Constant(1), m.gamma.shape))\n                m.beta.set_data(init.initializer(init.Constant(0), m.beta.shape))\n\n    def forward_embeddings(self, x: Tensor) -> Tensor:\n        # patch embedding\n        x = self.patch_embed(x)\n        # B,C,H,W-> B,H,W,C\n        x = ops.transpose(x, (0, 2, 3, 1))\n        return x\n\n    def forward_tokens(self, x: Tensor) -> Tensor:\n        for idx, block in enumerate(self.network):\n            if idx == 2:  # add positional encoding after outlooker blocks\n                x = x + self.pos_embed\n                x = self.pos_drop(x)\n            x = block(x)\n\n        B, H, W, C = x.shape\n        x = ops.reshape(x, (B, -1, C))\n        return x\n\n    def forward_cls(self, x: Tensor) -> Tensor:\n        # B, N, C = x.shape\n        cls_tokens = ops.broadcast_to(self.cls_token, (x.shape[0], -1, -1))\n        x = ops.Cast()(x, cls_tokens.dtype)\n        x = ops.concat([cls_tokens, x], 1)\n        for block in self.post_network:\n            x = block(x)\n        return x\n\n    def construct(self, x: Tensor) -> Tensor:\n        # step1: patch embedding\n        x = self.forward_embeddings(x)\n\n        # step2: tokens learning in the two stages\n        x = self.forward_tokens(x)\n\n        # step3: post network, apply class attention or not\n        if self.post_network is not None:\n            x = self.forward_cls(x)\n        x = self.norm(x)\n\n        if self.return_mean:  # if no class token, return mean\n            return self.head(ops.mean(x, 1))\n\n        x_cls = self.head(x[:, 0])\n        if not self.return_dense:\n            return x_cls\n\n        return x_cls\n
"},{"location":"reference/models/#mindcv.models.volo.volo_d1","title":"mindcv.models.volo.volo_d1(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

VOLO-D1 model, Params: 27M --layers: [x,x,x,x], four blocks in two stages, the first stage(block) is outlooker, the other three blocks are transformer, we set four blocks, which are easily applied to downstream tasks --embed_dims, --num_heads,: embedding dim, number of heads in each block --downsamples: flags to apply downsampling or not in four blocks --outlook_attention: flags to apply outlook attention or not --mlp_ratios: mlp ratio in four blocks --post_layers: post layers like two class attention layers using [ca, ca] See detail for all args in the class VOLO()

Source code in mindcv/models/volo.py
@register_model\ndef volo_d1(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n\"\"\"\n    VOLO-D1 model, Params: 27M\n    --layers: [x,x,x,x], four blocks in two stages, the first stage(block) is outlooker,\n            the other three blocks are transformer, we set four blocks, which are easily\n             applied to downstream tasks\n    --embed_dims, --num_heads,: embedding dim, number of heads in each block\n    --downsamples: flags to apply downsampling or not in four blocks\n    --outlook_attention: flags to apply outlook attention or not\n    --mlp_ratios: mlp ratio in four blocks\n    --post_layers: post layers like two class attention layers using [ca, ca]\n    See detail for all args in the class VOLO()\n    \"\"\"\n    default_cfg = default_cfgs['volo_d1']\n\n    # first block is outlooker (stage1), the other three are transformer (stage2)\n    model = VOLO(layers=[4, 4, 8, 2],\n                 in_channels=in_channels,\n                 num_classes=num_classes,\n                 embed_dims=[192, 384, 384, 384],\n                 num_heads=[6, 12, 12, 12],\n                 mlp_ratios=[3, 3, 3, 3],\n                 downsamples=[True, False, False, False],\n                 outlook_attention=[True, False, False, False],\n                 post_layers=['ca', 'ca'],\n                 **kwargs)\n    model.default_cfg = default_cfg\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.volo.volo_d2","title":"mindcv.models.volo.volo_d2(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

VOLO-D2 model, Params: 59M

Source code in mindcv/models/volo.py
@register_model\ndef volo_d2(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n\"\"\"\n    VOLO-D2 model, Params: 59M\n    \"\"\"\n    default_cfg = default_cfgs['volo_d2']\n    model = VOLO(layers=[6, 4, 10, 4],\n                 in_channels=in_channels,\n                 num_classes=num_classes,\n                 embed_dims=[256, 512, 512, 512],\n                 num_heads=[8, 16, 16, 16],\n                 mlp_ratios=[3, 3, 3, 3],\n                 downsamples=[True, False, False, False],\n                 outlook_attention=[True, False, False, False],\n                 post_layers=['ca', 'ca'],\n                 **kwargs)\n    model.default_cfg = default_cfg\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.volo.volo_d3","title":"mindcv.models.volo.volo_d3(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

VOLO-D3 model, Params: 86M

Source code in mindcv/models/volo.py
@register_model\ndef volo_d3(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n\"\"\"\n    VOLO-D3 model, Params: 86M\n    \"\"\"\n    default_cfg = default_cfgs['volo_d3']\n    model = VOLO(layers=[8, 8, 16, 4],\n                 in_channels=in_channels,\n                 num_classes=num_classes,\n                 embed_dims=[256, 512, 512, 512],\n                 num_heads=[8, 16, 16, 16],\n                 mlp_ratios=[3, 3, 3, 3],\n                 downsamples=[True, False, False, False],\n                 outlook_attention=[True, False, False, False],\n                 post_layers=['ca', 'ca'],\n                 **kwargs)\n    model.default_cfg = default_cfg\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.volo.volo_d4","title":"mindcv.models.volo.volo_d4(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

VOLO-D4 model, Params: 193M

Source code in mindcv/models/volo.py
@register_model\ndef volo_d4(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n\"\"\"\n    VOLO-D4 model, Params: 193M\n    \"\"\"\n    default_cfg = default_cfgs['volo_d4']\n    model = VOLO(layers=[8, 8, 16, 4],\n                 in_channels=in_channels,\n                 num_classes=num_classes,\n                 embed_dims=[384, 768, 768, 768],\n                 num_heads=[12, 16, 16, 16],\n                 mlp_ratios=[3, 3, 3, 3],\n                 downsamples=[True, False, False, False],\n                 outlook_attention=[True, False, False, False],\n                 post_layers=['ca', 'ca'],\n                 **kwargs)\n    model.default_cfg = default_cfg\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#mindcv.models.volo.volo_d5","title":"mindcv.models.volo.volo_d5(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

VOLO-D5 model, Params: 296M stem_hidden_dim=128, the dim in patch embedding is 128 for VOLO-D5

Source code in mindcv/models/volo.py
@register_model\ndef volo_d5(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):\n\"\"\"\n    VOLO-D5 model, Params: 296M\n    stem_hidden_dim=128, the dim in patch embedding is 128 for VOLO-D5\n    \"\"\"\n    default_cfg = default_cfgs['volo_d5']\n    model = VOLO(layers=[12, 12, 20, 4],\n                 embed_dims=[384, 768, 768, 768],\n                 num_heads=[12, 16, 16, 16],\n                 mlp_ratios=[4, 4, 4, 4],\n                 downsamples=[True, False, False, False],\n                 outlook_attention=[True, False, False, False],\n                 post_layers=['ca', 'ca'],\n                 stem_hidden_dim=128,\n                 **kwargs)\n    model.default_cfg = default_cfg\n\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"reference/models/#xcit","title":"xcit","text":""},{"location":"reference/models/#mindcv.models.xcit.XCiT","title":"mindcv.models.xcit.XCiT","text":"

Bases: nn.Cell

XCiT model class, based on \"XCiT: Cross-Covariance Image Transformers\" <https://arxiv.org/abs/2106.09681>_

PARAMETER DESCRIPTION img_size

input image size

TYPE: (int, tuple) DEFAULT: 224

patch_size

patch size

TYPE: (int, tuple) DEFAULT: 16

in_chans

number of input channels

TYPE: int DEFAULT: 3

num_classes

number of classes for classification head

TYPE: int DEFAULT: 1000

embed_dim

embedding dimension

TYPE: int DEFAULT: 768

depth

depth of transformer

TYPE: int DEFAULT: 12

num_heads

number of attention heads

TYPE: int DEFAULT: 12

mlp_ratio

ratio of mlp hidden dim to embedding dim

TYPE: int DEFAULT: 4.0

qkv_bias

enable bias for qkv if True

TYPE: bool DEFAULT: True

qk_scale

override default qk scale of head_dim ** -0.5 if set

TYPE: float DEFAULT: None

drop_rate

dropout rate

TYPE: float DEFAULT: 0.0

attn_drop_rate

attention dropout rate

TYPE: float DEFAULT: 0.0

drop_path_rate

stochastic depth rate

TYPE: float DEFAULT: 0.0

norm_layer

(nn.Module): normalization layer

TYPE: nn.Cell DEFAULT: None

cls_attn_layers

(int) Depth of Class attention layers

TYPE: int DEFAULT: 2

use_pos

(bool) whether to use positional encoding

TYPE: bool DEFAULT: True

eta

(float) layerscale initialization value

TYPE: float DEFAULT: None

tokens_norm

(bool) Whether to normalize all tokens or just the cls_token in the CA

TYPE: bool DEFAULT: False

Source code in mindcv/models/xcit.py
class XCiT(nn.Cell):\nr\"\"\"XCiT model class, based on\n    `\"XCiT: Cross-Covariance Image Transformers\" <https://arxiv.org/abs/2106.09681>`_\n    Args:\n        img_size (int, tuple): input image size\n        patch_size (int, tuple): patch size\n        in_chans (int): number of input channels\n        num_classes (int): number of classes for classification head\n        embed_dim (int): embedding dimension\n        depth (int): depth of transformer\n        num_heads (int): number of attention heads\n        mlp_ratio (int): ratio of mlp hidden dim to embedding dim\n        qkv_bias (bool): enable bias for qkv if True\n        qk_scale (float): override default qk scale of head_dim ** -0.5 if set\n        drop_rate (float): dropout rate\n        attn_drop_rate (float): attention dropout rate\n        drop_path_rate (float): stochastic depth rate\n        norm_layer: (nn.Module): normalization layer\n        cls_attn_layers: (int) Depth of Class attention layers\n        use_pos: (bool) whether to use positional encoding\n        eta: (float) layerscale initialization value\n        tokens_norm: (bool) Whether to normalize all tokens or just the cls_token in the CA\n    \"\"\"\n\n    def __init__(self,\n                 img_size: int = 224,\n                 patch_size: int = 16,\n                 in_chans: int = 3,\n                 num_classes: int = 1000,\n                 embed_dim: int = 768,\n                 depth: int = 12,\n                 num_heads: int = 12,\n                 mlp_ratio: int = 4.,\n                 qkv_bias: bool = True,\n                 qk_scale: float = None,\n                 drop_rate: float = 0.,\n                 attn_drop_rate: float = 0.,\n                 drop_path_rate: float = 0.,\n                 norm_layer: nn.Cell = None,\n                 cls_attn_layers: int = 2,\n                 use_pos: bool = True,\n                 patch_proj: str = 'linear',\n                 eta: float = None,\n                 tokens_norm: bool = False):\n        super().__init__()\n\n        self.num_classes = num_classes\n        self.num_features = self.embed_dim = embed_dim\n        norm_layer = norm_layer or partial(nn.LayerNorm, epsilon=1e-6)\n\n        self.patch_embed = ConvPatchEmbed(img_size=img_size, embed_dim=embed_dim,\n                                          patch_size=patch_size)\n\n        num_patches = self.patch_embed.num_patches\n\n        self.cls_token = Parameter(\n            ops.zeros((1, 1, embed_dim), mstype.float32))\n        self.pos_drop = Dropout(p=drop_rate)\n\n        dpr = [drop_path_rate for i in range(depth)]\n        self.blocks = nn.CellList([\n            XCABlock(\n                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,\n                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],\n                norm_layer=norm_layer, num_tokens=num_patches, eta=eta)\n            for i in range(depth)])\n\n        self.cls_attn_blocks = nn.CellList([\n            ClassAttentionBlock(\n                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,\n                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, norm_layer=norm_layer,\n                eta=eta, tokens_norm=tokens_norm)\n            for i in range(cls_attn_layers)])\n        self.norm = norm_layer([embed_dim])\n        self.head = nn.Dense(\n            in_channels=embed_dim, out_channels=num_classes) if num_classes > 0 else ops.Identity()\n\n        self.pos_embeder = PositionalEncodingFourier(dim=embed_dim)\n        self.use_pos = use_pos\n\n        # Classifier head\n        self.cls_token.set_data(weight_init.initializer(weight_init.TruncatedNormal(sigma=0.02),\n                                                        self.cls_token.shape,\n                                                        self.cls_token.dtype))\n        self._init_weights()\n\n    def _init_weights(self) -> None:\n        for name, m in self.cells_and_names():\n            if isinstance(m, nn.Dense):\n                m.weight = weight_init.initializer(weight_init.TruncatedNormal(\n                    sigma=0.02), m.weight.shape, mindspore.float32)\n                if m.bias is not None:\n                    m.bias.set_data(weight_init.initializer(\n                        weight_init.Constant(0), m.bias.shape))\n            elif isinstance(m, nn.LayerNorm):\n                m.beta.set_data(weight_init.initializer(\n                    weight_init.Constant(0), m.beta.shape))\n                m.gamma.set_data(weight_init.initializer(\n                    weight_init.Constant(1), m.gamma.shape))\n\n    def forward_features(self, x):\n        B, C, H, W = x.shape\n        x, (Hp, Wp) = self.patch_embed(x)\n        if self.use_pos:\n            pos_encoding = self.pos_embeder(B, Hp, Wp).reshape(\n                B, -1, x.shape[1]).transpose(0, 2, 1)\n            x = x + pos_encoding\n        x = self.pos_drop(x)\n        for blk in self.blocks:\n            x = blk(x, Hp, Wp)\n        cls_tokens = ops.broadcast_to(self.cls_token, (B, -1, -1))\n        cls_tokens = ops.cast(cls_tokens, x.dtype)\n        x = ops.concat((cls_tokens, x), 1)\n\n        for blk in self.cls_attn_blocks:\n            x = blk(x, Hp, Wp)\n        return self.norm(x)[:, 0]\n\n    def construct(self, x):\n        x = self.forward_features(x)\n        x = self.head(x)\n        return x\n
"},{"location":"reference/models/#mindcv.models.xcit.xcit_tiny_12_p16_224","title":"mindcv.models.xcit.xcit_tiny_12_p16_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs)","text":"

Get xcit_tiny_12_p16_224 model. Refer to the base class 'models.XCiT' for more details.

Source code in mindcv/models/xcit.py
@register_model\ndef xcit_tiny_12_p16_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> XCiT:\n\"\"\"Get xcit_tiny_12_p16_224 model.\n    Refer to the base class 'models.XCiT' for more details.\n    \"\"\"\n    default_cfg = default_cfgs['xcit_tiny_12_p16_224']\n    model = XCiT(\n        patch_size=16, num_classes=num_classes, embed_dim=192, depth=12, num_heads=4, mlp_ratio=4, qkv_bias=True,\n        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), eta=1.0, tokens_norm=True, **kwargs)\n    if pretrained:\n        load_pretrained(model, default_cfg,\n                        num_classes=num_classes, in_channels=in_channels)\n\n    return model\n
"},{"location":"reference/optim/","title":"Optimizer","text":""},{"location":"reference/optim/#optimizer-factory","title":"Optimizer Factory","text":""},{"location":"reference/optim/#mindcv.optim.optim_factory.create_optimizer","title":"mindcv.optim.optim_factory.create_optimizer(params, opt='adam', lr=0.001, weight_decay=0, momentum=0.9, nesterov=False, filter_bias_and_bn=True, loss_scale=1.0, schedule_decay=0.004, checkpoint_path='', eps=1e-10, **kwargs)","text":"

Creates optimizer by name.

PARAMETER DESCRIPTION params

network parameters. Union[list[Parameter],list[dict]], which must be the list of parameters or list of dicts. When the list element is a dictionary, the key of the dictionary can be \"params\", \"lr\", \"weight_decay\",\"grad_centralization\" and \"order_params\".

opt

wrapped optimizer. You could choose like 'sgd', 'nesterov', 'momentum', 'adam', 'adamw', 'lion', 'rmsprop', 'adagrad', 'lamb'. 'adam' is the default choose for convolution-based networks. 'adamw' is recommended for ViT-based networks. Default: 'adam'.

TYPE: str DEFAULT: 'adam'

lr

learning rate: float or lr scheduler. Fixed and dynamic learning rate are supported. Default: 1e-3.

TYPE: Optional[float] DEFAULT: 0.001

weight_decay

weight decay factor. It should be noted that weight decay can be a constant value or a Cell. It is a Cell only when dynamic weight decay is applied. Dynamic weight decay is similar to dynamic learning rate, users need to customize a weight decay schedule only with global step as input, and during training, the optimizer calls the instance of WeightDecaySchedule to get the weight decay value of current step. Default: 0.

TYPE: float DEFAULT: 0

momentum

momentum if the optimizer supports. Default: 0.9.

TYPE: float DEFAULT: 0.9

nesterov

Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients. Default: False.

TYPE: bool DEFAULT: False

filter_bias_and_bn

whether to filter batch norm parameters and bias from weight decay. If True, weight decay will not apply on BN parameters and bias in Conv or Dense layers. Default: True.

TYPE: bool DEFAULT: True

loss_scale

A floating point value for the loss scale, which must be larger than 0.0. Default: 1.0.

TYPE: float DEFAULT: 1.0

RETURNS DESCRIPTION

Optimizer object

Source code in mindcv/optim/optim_factory.py
def create_optimizer(\n    params,\n    opt: str = \"adam\",\n    lr: Optional[float] = 1e-3,\n    weight_decay: float = 0,\n    momentum: float = 0.9,\n    nesterov: bool = False,\n    filter_bias_and_bn: bool = True,\n    loss_scale: float = 1.0,\n    schedule_decay: float = 4e-3,\n    checkpoint_path: str = \"\",\n    eps: float = 1e-10,\n    **kwargs,\n):\nr\"\"\"Creates optimizer by name.\n\n    Args:\n        params: network parameters. Union[list[Parameter],list[dict]], which must be the list of parameters\n            or list of dicts. When the list element is a dictionary, the key of the dictionary can be\n            \"params\", \"lr\", \"weight_decay\",\"grad_centralization\" and \"order_params\".\n        opt: wrapped optimizer. You could choose like 'sgd', 'nesterov', 'momentum', 'adam', 'adamw', 'lion',\n            'rmsprop', 'adagrad', 'lamb'. 'adam' is the default choose for convolution-based networks.\n            'adamw' is recommended for ViT-based networks. Default: 'adam'.\n        lr: learning rate: float or lr scheduler. Fixed and dynamic learning rate are supported. Default: 1e-3.\n        weight_decay: weight decay factor. It should be noted that weight decay can be a constant value or a Cell.\n            It is a Cell only when dynamic weight decay is applied. Dynamic weight decay is similar to\n            dynamic learning rate, users need to customize a weight decay schedule only with global step as input,\n            and during training, the optimizer calls the instance of WeightDecaySchedule to get the weight decay value\n            of current step. Default: 0.\n        momentum: momentum if the optimizer supports. Default: 0.9.\n        nesterov: Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients. Default: False.\n        filter_bias_and_bn: whether to filter batch norm parameters and bias from weight decay.\n            If True, weight decay will not apply on BN parameters and bias in Conv or Dense layers. Default: True.\n        loss_scale: A floating point value for the loss scale, which must be larger than 0.0. Default: 1.0.\n\n    Returns:\n        Optimizer object\n    \"\"\"\n\n    opt = opt.lower()\n\n    if weight_decay and filter_bias_and_bn:\n        params = init_group_params(params, weight_decay)\n\n    opt_args = dict(**kwargs)\n    # if lr is not None:\n    #    opt_args.setdefault('lr', lr)\n\n    # non-adaptive: SGD, momentum, and nesterov\n    if opt == \"sgd\":\n        # note: nn.Momentum may perform better if momentum > 0.\n        optimizer = nn.SGD(\n            params=params,\n            learning_rate=lr,\n            momentum=momentum,\n            weight_decay=weight_decay,\n            nesterov=nesterov,\n            loss_scale=loss_scale,\n            **opt_args,\n        )\n    elif opt in [\"momentum\", \"nesterov\"]:\n        optimizer = nn.Momentum(\n            params=params,\n            learning_rate=lr,\n            momentum=momentum,\n            weight_decay=weight_decay,\n            use_nesterov=nesterov,\n            loss_scale=loss_scale,\n        )\n    # adaptive\n    elif opt == \"adam\":\n        optimizer = nn.Adam(\n            params=params,\n            learning_rate=lr,\n            weight_decay=weight_decay,\n            loss_scale=loss_scale,\n            use_nesterov=nesterov,\n            **opt_args,\n        )\n    elif opt == \"adamw\":\n        optimizer = AdamW(\n            params=params,\n            learning_rate=lr,\n            weight_decay=weight_decay,\n            loss_scale=loss_scale,\n            **opt_args,\n        )\n    elif opt == \"lion\":\n        optimizer = Lion(\n            params=params,\n            learning_rate=lr,\n            weight_decay=weight_decay,\n            loss_scale=loss_scale,\n            **opt_args,\n        )\n    elif opt == \"nadam\":\n        optimizer = NAdam(\n            params=params,\n            learning_rate=lr,\n            weight_decay=weight_decay,\n            loss_scale=loss_scale,\n            schedule_decay=schedule_decay,\n            **opt_args,\n        )\n    elif opt == \"adan\":\n        optimizer = Adan(\n            params=params,\n            learning_rate=lr,\n            weight_decay=weight_decay,\n            loss_scale=loss_scale,\n            **opt_args,\n        )\n    elif opt == \"rmsprop\":\n        optimizer = nn.RMSProp(\n            params=params,\n            learning_rate=lr,\n            momentum=momentum,\n            weight_decay=weight_decay,\n            loss_scale=loss_scale,\n            epsilon=eps,\n            **opt_args,\n        )\n    elif opt == \"adagrad\":\n        optimizer = nn.Adagrad(\n            params=params,\n            learning_rate=lr,\n            weight_decay=weight_decay,\n            loss_scale=loss_scale,\n            **opt_args,\n        )\n    elif opt == \"lamb\":\n        assert loss_scale == 1.0, \"Loss scaler is not supported by Lamb optimizer\"\n        optimizer = nn.Lamb(\n            params=params,\n            learning_rate=lr,\n            weight_decay=weight_decay,\n            **opt_args,\n        )\n    else:\n        raise ValueError(f\"Invalid optimizer: {opt}\")\n\n    if os.path.exists(checkpoint_path):\n        param_dict = load_checkpoint(checkpoint_path)\n        load_param_into_net(optimizer, param_dict)\n\n    return optimizer\n
"},{"location":"reference/optim/#adamw","title":"AdamW","text":""},{"location":"reference/optim/#mindcv.optim.adamw.AdamW","title":"mindcv.optim.adamw.AdamW","text":"

Bases: Optimizer

Implements the gradient clipping by norm for a AdamWeightDecay optimizer.

Source code in mindcv/optim/adamw.py
class AdamW(Optimizer):\n\"\"\"\n    Implements the gradient clipping by norm for a AdamWeightDecay optimizer.\n    \"\"\"\n\n    @opt_init_args_register\n    def __init__(\n        self,\n        params,\n        learning_rate=1e-3,\n        beta1=0.9,\n        beta2=0.999,\n        eps=1e-8,\n        weight_decay=0.0,\n        loss_scale=1.0,\n        clip=False,\n    ):\n        super().__init__(learning_rate, params, weight_decay)\n        _check_param_value(beta1, beta2, eps, self.cls_name)\n        self.beta1 = Tensor(np.array([beta1]).astype(np.float32))\n        self.beta2 = Tensor(np.array([beta2]).astype(np.float32))\n        self.eps = Tensor(np.array([eps]).astype(np.float32))\n        self.moments1 = self.parameters.clone(prefix=\"adam_m\", init=\"zeros\")\n        self.moments2 = self.parameters.clone(prefix=\"adam_v\", init=\"zeros\")\n        self.hyper_map = ops.HyperMap()\n        self.beta1_power = Parameter(initializer(1, [1], ms.float32), name=\"beta1_power\")\n        self.beta2_power = Parameter(initializer(1, [1], ms.float32), name=\"beta2_power\")\n\n        self.reciprocal_scale = Tensor(1.0 / loss_scale, ms.float32)\n        self.clip = clip\n\n    def construct(self, gradients):\n        lr = self.get_lr()\n        gradients = scale_grad(gradients, self.reciprocal_scale)\n        if self.clip:\n            gradients = ops.clip_by_global_norm(gradients, 5.0, None)\n\n        beta1_power = self.beta1_power * self.beta1\n        self.beta1_power = beta1_power\n        beta2_power = self.beta2_power * self.beta2\n        self.beta2_power = beta2_power\n\n        if self.is_group:\n            if self.is_group_lr:\n                optim_result = self.hyper_map(\n                    ops.partial(_adam_opt, beta1_power, beta2_power, self.beta1, self.beta2, self.eps),\n                    lr,\n                    self.weight_decay,\n                    self.parameters,\n                    self.moments1,\n                    self.moments2,\n                    gradients,\n                    self.decay_flags,\n                    self.optim_filter,\n                )\n            else:\n                optim_result = self.hyper_map(\n                    ops.partial(_adam_opt, beta1_power, beta2_power, self.beta1, self.beta2, self.eps, lr),\n                    self.weight_decay,\n                    self.parameters,\n                    self.moments1,\n                    self.moments2,\n                    gradients,\n                    self.decay_flags,\n                    self.optim_filter,\n                )\n        else:\n            optim_result = self.hyper_map(\n                ops.partial(\n                    _adam_opt, beta1_power, beta2_power, self.beta1, self.beta2, self.eps, lr, self.weight_decay\n                ),\n                self.parameters,\n                self.moments1,\n                self.moments2,\n                gradients,\n                self.decay_flags,\n                self.optim_filter,\n            )\n        if self.use_parallel:\n            self.broadcast_params(optim_result)\n        return optim_result\n
"},{"location":"reference/optim/#adan","title":"Adan","text":""},{"location":"reference/optim/#mindcv.optim.adan.Adan","title":"mindcv.optim.adan.Adan","text":"

Bases: Optimizer

The Adan (ADAptive Nesterov momentum algorithm) Optimizer from https://arxiv.org/abs/2208.06677

Note: it is an experimental version.

Source code in mindcv/optim/adan.py
class Adan(Optimizer):\n\"\"\"\n    The Adan (ADAptive Nesterov momentum algorithm) Optimizer from https://arxiv.org/abs/2208.06677\n\n    Note: it is an experimental version.\n    \"\"\"\n\n    @opt_init_args_register\n    def __init__(\n        self,\n        params,\n        learning_rate=1e-3,\n        beta1=0.98,\n        beta2=0.92,\n        beta3=0.99,\n        eps=1e-8,\n        use_locking=False,\n        weight_decay=0.0,\n        loss_scale=1.0,\n    ):\n        super().__init__(\n            learning_rate, params, weight_decay=weight_decay, loss_scale=loss_scale\n        )  # Optimized inherit weight decay is bloaked. weight decay is computed in this py.\n\n        _check_param_value(beta1, beta2, eps, self.cls_name)\n        assert isinstance(use_locking, bool), f\"For {self.cls_name}, use_looking should be bool\"\n\n        self.beta1 = Tensor(beta1, mstype.float32)\n        self.beta2 = Tensor(beta2, mstype.float32)\n        self.beta3 = Tensor(beta3, mstype.float32)\n\n        self.eps = Tensor(eps, mstype.float32)\n        self.use_locking = use_locking\n        self.moment1 = self._parameters.clone(prefix=\"moment1\", init=\"zeros\")  # m\n        self.moment2 = self._parameters.clone(prefix=\"moment2\", init=\"zeros\")  # v\n        self.moment3 = self._parameters.clone(prefix=\"moment3\", init=\"zeros\")  # n\n        self.prev_gradient = self._parameters.clone(prefix=\"prev_gradient\", init=\"zeros\")\n\n        self.weight_decay = Tensor(weight_decay, mstype.float32)\n\n    def construct(self, gradients):\n        params = self._parameters\n        moment1 = self.moment1\n        moment2 = self.moment2\n        moment3 = self.moment3\n\n        gradients = self.flatten_gradients(gradients)\n        gradients = self.gradients_centralization(gradients)\n        gradients = self.scale_grad(gradients)\n        gradients = self._grad_sparse_indices_deduplicate(gradients)\n        lr = self.get_lr()\n\n        # TODO: currently not support dist\n        success = self.map_(\n            ops.partial(_adan_opt, self.beta1, self.beta2, self.beta3, self.eps, lr, self.weight_decay),\n            params,\n            moment1,\n            moment2,\n            moment3,\n            gradients,\n            self.prev_gradient,\n        )\n\n        return success\n\n    @Optimizer.target.setter\n    def target(self, value):\n\"\"\"\n        If the input value is set to \"CPU\", the parameters will be updated on the host using the Fused\n        optimizer operation.\n        \"\"\"\n        self._set_base_target(value)\n
"},{"location":"reference/optim/#mindcv.optim.adan.Adan.target","title":"mindcv.optim.adan.Adan.target(value)","text":"

If the input value is set to \"CPU\", the parameters will be updated on the host using the Fused optimizer operation.

Source code in mindcv/optim/adan.py
@Optimizer.target.setter\ndef target(self, value):\n\"\"\"\n    If the input value is set to \"CPU\", the parameters will be updated on the host using the Fused\n    optimizer operation.\n    \"\"\"\n    self._set_base_target(value)\n
"},{"location":"reference/optim/#lion","title":"Lion","text":""},{"location":"reference/optim/#mindcv.optim.lion.Lion","title":"mindcv.optim.lion.Lion","text":"

Bases: Optimizer

Implementation of Lion optimizer from paper 'https://arxiv.org/abs/2302.06675'. Additionally, this implementation is with gradient clipping.

Notes: lr is usually 3-10x smaller than adamw. weight decay is usually 3-10x larger than adamw.

Source code in mindcv/optim/lion.py
class Lion(Optimizer):\n\"\"\"\n    Implementation of Lion optimizer from paper 'https://arxiv.org/abs/2302.06675'.\n    Additionally, this implementation is with gradient clipping.\n\n    Notes:\n    lr is usually 3-10x smaller than adamw.\n    weight decay is usually 3-10x larger than adamw.\n    \"\"\"\n\n    @opt_init_args_register\n    def __init__(\n        self,\n        params,\n        learning_rate=2e-4,\n        beta1=0.9,\n        beta2=0.99,\n        weight_decay=0.0,\n        loss_scale=1.0,\n        clip=False,\n    ):\n        super().__init__(learning_rate, params, weight_decay)\n        _check_param_value(beta1, beta2, self.cls_name)\n        self.beta1 = Tensor(np.array([beta1]).astype(np.float32))\n        self.beta2 = Tensor(np.array([beta2]).astype(np.float32))\n        self.moments1 = self.parameters.clone(prefix=\"lion_m\", init=\"zeros\")\n        self.hyper_map = ops.HyperMap()\n        self.beta1_power = Parameter(initializer(1, [1], ms.float32), name=\"beta1_power\")\n        self.beta2_power = Parameter(initializer(1, [1], ms.float32), name=\"beta2_power\")\n\n        self.reciprocal_scale = Tensor(1.0 / loss_scale, ms.float32)\n        self.clip = clip\n\n    def construct(self, gradients):\n        lr = self.get_lr()\n        gradients = scale_grad(gradients, self.reciprocal_scale)\n        if self.clip:\n            gradients = ops.clip_by_global_norm(gradients, 5.0, None)\n\n        beta1_power = self.beta1_power * self.beta1\n        self.beta1_power = beta1_power\n        beta2_power = self.beta2_power * self.beta2\n        self.beta2_power = beta2_power\n\n        if self.is_group:\n            if self.is_group_lr:\n                optim_result = self.hyper_map(\n                    ops.partial(_lion_opt, beta1_power, beta2_power, self.beta1, self.beta2),\n                    lr,\n                    self.weight_decay,\n                    self.parameters,\n                    self.moments1,\n                    gradients,\n                    self.decay_flags,\n                    self.optim_filter,\n                )\n            else:\n                optim_result = self.hyper_map(\n                    ops.partial(_lion_opt, beta1_power, beta2_power, self.beta1, self.beta2, lr),\n                    self.weight_decay,\n                    self.parameters,\n                    self.moments1,\n                    gradients,\n                    self.decay_flags,\n                    self.optim_filter,\n                )\n        else:\n            optim_result = self.hyper_map(\n                ops.partial(_lion_opt, beta1_power, beta2_power, self.beta1, self.beta2, lr, self.weight_decay),\n                self.parameters,\n                self.moments1,\n                gradients,\n                self.decay_flags,\n                self.optim_filter,\n            )\n        if self.use_parallel:\n            self.broadcast_params(optim_result)\n        return optim_result\n
"},{"location":"reference/optim/#nadam","title":"NAdam","text":""},{"location":"reference/optim/#mindcv.optim.nadam.NAdam","title":"mindcv.optim.nadam.NAdam","text":"

Bases: Optimizer

Implements NAdam algorithm (a variant of Adam based on Nesterov momentum).

Source code in mindcv/optim/nadam.py
class NAdam(Optimizer):\n\"\"\"\n    Implements NAdam algorithm (a variant of Adam based on Nesterov momentum).\n    \"\"\"\n\n    @opt_init_args_register\n    def __init__(\n        self,\n        params,\n        learning_rate=2e-3,\n        beta1=0.9,\n        beta2=0.999,\n        eps=1e-8,\n        weight_decay=0.0,\n        loss_scale=1.0,\n        schedule_decay=4e-3,\n    ):\n        super().__init__(learning_rate, params, weight_decay, loss_scale)\n        _check_param_value(beta1, beta2, eps, self.cls_name)\n        self.beta1 = Tensor(np.array([beta1]).astype(np.float32))\n        self.beta2 = Tensor(np.array([beta2]).astype(np.float32))\n        self.eps = Tensor(np.array([eps]).astype(np.float32))\n        self.moments1 = self.parameters.clone(prefix=\"nadam_m\", init=\"zeros\")\n        self.moments2 = self.parameters.clone(prefix=\"nadam_v\", init=\"zeros\")\n        self.schedule_decay = Tensor(np.array([schedule_decay]).astype(np.float32))\n        self.mu_schedule = Parameter(initializer(1, [1], ms.float32), name=\"mu_schedule\")\n        self.beta2_power = Parameter(initializer(1, [1], ms.float32), name=\"beta2_power\")\n\n    def construct(self, gradients):\n        lr = self.get_lr()\n        params = self.parameters\n        step = self.global_step + _scaler_one\n        gradients = self.decay_weight(gradients)\n        mu = self.beta1 * (\n            _scaler_one - Tensor(0.5, ms.float32) * ops.pow(Tensor(0.96, ms.float32), step * self.schedule_decay)\n        )\n        mu_next = self.beta1 * (\n            _scaler_one\n            - Tensor(0.5, ms.float32) * ops.pow(Tensor(0.96, ms.float32), (step + _scaler_one) * self.schedule_decay)\n        )\n        mu_schedule = self.mu_schedule * mu\n        mu_schedule_next = self.mu_schedule * mu * mu_next\n        self.mu_schedule = mu_schedule\n        beta2_power = self.beta2_power * self.beta2\n        self.beta2_power = beta2_power\n\n        num_params = len(params)\n        for i in range(num_params):\n            ops.assign(self.moments1[i], self.beta1 * self.moments1[i] + (_scaler_one - self.beta1) * gradients[i])\n            ops.assign(\n                self.moments2[i], self.beta2 * self.moments2[i] + (_scaler_one - self.beta2) * ops.square(gradients[i])\n            )\n\n            regulate_m = mu_next * self.moments1[i] / (_scaler_one - mu_schedule_next) + (_scaler_one - mu) * gradients[\n                i\n            ] / (_scaler_one - mu_schedule)\n            regulate_v = self.moments2[i] / (_scaler_one - beta2_power)\n\n            update = params[i] - lr * regulate_m / (self.eps + ops.sqrt(regulate_v))\n            ops.assign(params[i], update)\n\n        return params\n
"},{"location":"reference/scheduler/","title":"Learning Rate Scheduler","text":""},{"location":"reference/scheduler/#scheduler-factory","title":"Scheduler Factory","text":""},{"location":"reference/scheduler/#mindcv.scheduler.scheduler_factory.create_scheduler","title":"mindcv.scheduler.scheduler_factory.create_scheduler(steps_per_epoch, scheduler='constant', lr=0.01, min_lr=1e-06, warmup_epochs=3, warmup_factor=0.0, decay_epochs=10, decay_rate=0.9, milestones=None, num_epochs=200, num_cycles=1, cycle_decay=1.0, lr_epoch_stair=False)","text":"

Creates learning rate scheduler by name.

PARAMETER DESCRIPTION steps_per_epoch

number of steps per epoch.

TYPE: int

scheduler

scheduler name like 'constant', 'cosine_decay', 'step_decay', 'exponential_decay', 'polynomial_decay', 'multi_step_decay'. Default: 'constant'.

TYPE: str DEFAULT: 'constant'

lr

learning rate value. Default: 0.01.

TYPE: float DEFAULT: 0.01

min_lr

lower lr bound for 'cosine_decay' schedulers. Default: 1e-6.

TYPE: float DEFAULT: 1e-06

warmup_epochs

epochs to warmup LR, if scheduler supports. Default: 3.

TYPE: int DEFAULT: 3

warmup_factor

the warmup phase of scheduler is a linearly increasing lr, the beginning factor is warmup_factor, i.e., the lr of the first step/epoch is lr*warmup_factor, and the ending lr in the warmup phase is lr. Default: 0.0

TYPE: float DEFAULT: 0.0

decay_epochs

for 'cosine_decay' schedulers, decay LR to min_lr in decay_epochs. For 'step_decay' scheduler, decay LR by a factor of decay_rate every decay_epochs. Default: 10.

TYPE: int DEFAULT: 10

decay_rate

LR decay rate. Default: 0.9.

TYPE: float DEFAULT: 0.9

milestones

list of epoch milestones for 'multi_step_decay' scheduler. Must be increasing. Default: None

TYPE: list DEFAULT: None

num_epochs

Number of total epochs. Default: 200.

TYPE: int DEFAULT: 200

num_cycles

Number of cycles for cosine decay and cyclic. Default: 1.

TYPE: int DEFAULT: 1

cycle_decay

Decay rate of lr max in each cosine cycle. Default: 1.0.

TYPE: float DEFAULT: 1.0

lr_epoch_stair

If True, LR will be updated in the beginning of each new epoch and the LR will be consistent for each batch in one epoch. Otherwise, learning rate will be updated dynamically in each step. Default: False.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION

Cell object for computing LR with input of current global steps

Source code in mindcv/scheduler/scheduler_factory.py
def create_scheduler(\n    steps_per_epoch: int,\n    scheduler: str = \"constant\",\n    lr: float = 0.01,\n    min_lr: float = 1e-6,\n    warmup_epochs: int = 3,\n    warmup_factor: float = 0.0,\n    decay_epochs: int = 10,\n    decay_rate: float = 0.9,\n    milestones: list = None,\n    num_epochs: int = 200,\n    num_cycles: int = 1,\n    cycle_decay: float = 1.0,\n    lr_epoch_stair: bool = False,\n):\nr\"\"\"Creates learning rate scheduler by name.\n\n    Args:\n        steps_per_epoch: number of steps per epoch.\n        scheduler: scheduler name like 'constant', 'cosine_decay', 'step_decay',\n            'exponential_decay', 'polynomial_decay', 'multi_step_decay'. Default: 'constant'.\n        lr: learning rate value. Default: 0.01.\n        min_lr: lower lr bound for 'cosine_decay' schedulers. Default: 1e-6.\n        warmup_epochs: epochs to warmup LR, if scheduler supports. Default: 3.\n        warmup_factor: the warmup phase of scheduler is a linearly increasing lr,\n            the beginning factor is `warmup_factor`, i.e., the lr of the first step/epoch is lr*warmup_factor,\n            and the ending lr in the warmup phase is lr. Default: 0.0\n        decay_epochs: for 'cosine_decay' schedulers, decay LR to min_lr in `decay_epochs`.\n            For 'step_decay' scheduler, decay LR by a factor of `decay_rate` every `decay_epochs`. Default: 10.\n        decay_rate: LR decay rate. Default: 0.9.\n        milestones: list of epoch milestones for 'multi_step_decay' scheduler. Must be increasing. Default: None\n        num_epochs: Number of total epochs. Default: 200.\n        num_cycles: Number of cycles for cosine decay and cyclic. Default: 1.\n        cycle_decay: Decay rate of lr max in each cosine cycle. Default: 1.0.\n        lr_epoch_stair: If True, LR will be updated in the beginning of each new epoch\n            and the LR will be consistent for each batch in one epoch.\n            Otherwise, learning rate will be updated dynamically in each step. Default: False.\n    Returns:\n        Cell object for computing LR with input of current global steps\n    \"\"\"\n    # check params\n    if milestones is None:\n        milestones = []\n\n    if warmup_epochs + decay_epochs > num_epochs:\n        _logger.warning(\"warmup_epochs + decay_epochs > num_epochs. Please check and reduce decay_epochs!\")\n\n    # lr warmup phase\n    warmup_lr_scheduler = []\n    if warmup_epochs > 0:\n        if warmup_factor == 0 and lr_epoch_stair:\n            _logger.warning(\n                \"The warmup factor is set to 0, lr of 0-th epoch is always zero! \" \"Recommend value is 0.01.\"\n            )\n        warmup_func = linear_lr if lr_epoch_stair else linear_refined_lr\n        warmup_lr_scheduler = warmup_func(\n            start_factor=warmup_factor,\n            end_factor=1.0,\n            total_iters=warmup_epochs,\n            lr=lr,\n            steps_per_epoch=steps_per_epoch,\n            epochs=warmup_epochs,\n        )\n\n    # lr decay phase\n    main_epochs = num_epochs - warmup_epochs\n    if scheduler in [\"cosine_decay\", \"warmup_cosine_decay\"]:\n        cosine_func = cosine_decay_lr if lr_epoch_stair else cosine_decay_refined_lr\n        main_lr_scheduler = cosine_func(\n            decay_epochs=decay_epochs,\n            eta_min=min_lr,\n            eta_max=lr,\n            steps_per_epoch=steps_per_epoch,\n            epochs=main_epochs,\n            num_cycles=num_cycles,\n            cycle_decay=cycle_decay,\n        )\n    elif scheduler == \"one_cycle\":\n        if lr_epoch_stair or warmup_epochs > 0:\n            raise ValueError(\n                \"OneCycle scheduler doesn't support learning rate varies with epoch and warmup_epochs > 0.\"\n            )\n        div_factor = 25.0\n        initial_lr = lr / div_factor\n        final_div_factor = initial_lr / min_lr\n        main_lr_scheduler = one_cycle_lr(\n            max_lr=lr,\n            final_div_factor=final_div_factor,\n            steps_per_epoch=steps_per_epoch,\n            epochs=main_epochs,\n        )\n    elif scheduler == \"cyclic\":\n        if lr_epoch_stair or warmup_epochs > 0:\n            raise ValueError(\"Cyclic scheduler doesn't support learning rate varies with epoch and warmup_epochs > 0.\")\n        num_steps = steps_per_epoch * main_epochs\n        step_size_up = int(num_steps / num_cycles / 2)\n        main_lr_scheduler = cyclic_lr(\n            base_lr=min_lr,\n            max_lr=lr,\n            step_size_up=step_size_up,\n            steps_per_epoch=steps_per_epoch,\n            epochs=main_epochs,\n        )\n    elif scheduler == \"exponential_decay\":\n        exponential_func = exponential_lr if lr_epoch_stair else exponential_refined_lr\n        main_lr_scheduler = exponential_func(\n            gamma=decay_rate, lr=lr, steps_per_epoch=steps_per_epoch, epochs=main_epochs\n        )\n    elif scheduler == \"polynomial_decay\":\n        polynomial_func = polynomial_lr if lr_epoch_stair else polynomial_refined_lr\n        main_lr_scheduler = polynomial_func(\n            total_iters=main_epochs, power=decay_rate, lr=lr, steps_per_epoch=steps_per_epoch, epochs=main_epochs\n        )\n    elif scheduler == \"step_decay\":\n        main_lr_scheduler = step_lr(\n            step_size=decay_epochs, gamma=decay_rate, lr=lr, steps_per_epoch=steps_per_epoch, epochs=main_epochs\n        )\n    elif scheduler == \"multi_step_decay\":\n        main_lr_scheduler = multi_step_lr(\n            milestones=milestones, gamma=decay_rate, lr=lr, steps_per_epoch=steps_per_epoch, epochs=main_epochs\n        )\n    elif scheduler == \"constant\":\n        main_lr_scheduler = [lr for _ in range(steps_per_epoch * main_epochs)]\n    else:\n        raise ValueError(f\"Invalid scheduler: {scheduler}\")\n\n    # combine\n    lr_scheduler = warmup_lr_scheduler + main_lr_scheduler\n\n    return lr_scheduler\n
"},{"location":"reference/scheduler/#mindcv.scheduler.dynamic_lr","title":"mindcv.scheduler.dynamic_lr","text":"

Meta learning rate scheduler.

This module implements exactly the same learning rate scheduler as native PyTorch, see \"torch.optim.lr_scheduler\" <https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate>_. At present, only constant_lr, linear_lr, polynomial_lr, exponential_lr, step_lr, multi_step_lr, cosine_annealing_lr, cosine_annealing_warm_restarts_lr, one_cycle_lr, cyclic_lr are implemented. The number, name and usage of the Positional Arguments are exactly the same as those of native PyTorch.

However, due to the constraint of having to explicitly return the learning rate at each step, we have to introduce additional Keyword Arguments. There are only three Keyword Arguments introduced, namely lr, steps_per_epoch and epochs, explained as follows: lr: the basic learning rate when creating optim in torch. steps_per_epoch: the number of steps(iterations) of each epoch. epochs: the number of epoch. It and steps_per_epoch determine the length of the returned lrs.

In all schedulers, one_cycle_lr and cyclic_lr only need two Keyword Arguments except lr, since when creating optim in torch, lr argument will have no effect if using the two schedulers above.

Since most scheduler in PyTorch are coarse-grained, that is the learning rate is constant within a single epoch. For non-stepwise scheduler, we introduce several fine-grained variation, that is the learning rate is also changed within a single epoch. The function name of these variants have the refined keyword. The implemented fine-grained variation are list as follows: linear_refined_lr, polynomial_refined_lr, etc.

"},{"location":"reference/scheduler/#mindcv.scheduler.dynamic_lr.cosine_decay_lr","title":"mindcv.scheduler.dynamic_lr.cosine_decay_lr(decay_epochs, eta_min, *, eta_max, steps_per_epoch, epochs, num_cycles=1, cycle_decay=1.0)","text":"

update every epoch

Source code in mindcv/scheduler/dynamic_lr.py
def cosine_decay_lr(decay_epochs, eta_min, *, eta_max, steps_per_epoch, epochs, num_cycles=1, cycle_decay=1.0):\n\"\"\"update every epoch\"\"\"\n    tot_steps = steps_per_epoch * epochs\n    lrs = []\n\n    for c in range(num_cycles):\n        lr_max = eta_max * (cycle_decay**c)\n        delta = 0.5 * (lr_max - eta_min)\n        for i in range(steps_per_epoch * decay_epochs):\n            t_cur = math.floor(i / steps_per_epoch)\n            t_cur = min(t_cur, decay_epochs)\n            lr_cur = eta_min + delta * (1.0 + math.cos(math.pi * t_cur / decay_epochs))\n            if len(lrs) < tot_steps:\n                lrs.append(lr_cur)\n            else:\n                break\n\n    if epochs > num_cycles * decay_epochs:\n        for i in range((epochs - (num_cycles * decay_epochs)) * steps_per_epoch):\n            lrs.append(eta_min)\n\n    return lrs\n
"},{"location":"reference/scheduler/#mindcv.scheduler.dynamic_lr.cosine_decay_refined_lr","title":"mindcv.scheduler.dynamic_lr.cosine_decay_refined_lr(decay_epochs, eta_min, *, eta_max, steps_per_epoch, epochs, num_cycles=1, cycle_decay=1.0)","text":"

update every step

Source code in mindcv/scheduler/dynamic_lr.py
def cosine_decay_refined_lr(decay_epochs, eta_min, *, eta_max, steps_per_epoch, epochs, num_cycles=1, cycle_decay=1.0):\n\"\"\"update every step\"\"\"\n    tot_steps = steps_per_epoch * epochs\n    lrs = []\n\n    for c in range(num_cycles):\n        lr_max = eta_max * (cycle_decay**c)\n        delta = 0.5 * (lr_max - eta_min)\n        for i in range(steps_per_epoch * decay_epochs):\n            t_cur = i / steps_per_epoch\n            t_cur = min(t_cur, decay_epochs)\n            lr_cur = eta_min + delta * (1.0 + math.cos(math.pi * t_cur / decay_epochs))\n            if len(lrs) < tot_steps:\n                lrs.append(lr_cur)\n            else:\n                break\n\n    if epochs > num_cycles * decay_epochs:\n        for i in range((epochs - (num_cycles * decay_epochs)) * steps_per_epoch):\n            lrs.append(eta_min)\n\n    return lrs\n
"},{"location":"reference/scheduler/#mindcv.scheduler.dynamic_lr.cyclic_lr","title":"mindcv.scheduler.dynamic_lr.cyclic_lr(base_lr, max_lr, step_size_up=2000, step_size_down=None, mode='triangular', gamma=1.0, scale_fn=None, scale_mode='cycle', *, steps_per_epoch, epochs)","text":"

Cyclic learning rate scheduler based on '\"Cyclical Learning Rates for Training Neural Networks\" https://arxiv.org/abs/1708.07120'

PARAMETER DESCRIPTION base_lr

Lower learning rate boundaries in each cycle.

TYPE: float

max_lr

Upper learning rate boundaries in each cycle.

TYPE: float

step_size_up

Number of steps in the increasing half in each cycle. Default: 2000.

TYPE: int DEFAULT: 2000

step_size_down

Number of steps in the increasing half in each cycle. If step_size_down is None, it's set to step_size_up. Default: None.

DEFAULT: None

div_factor

Initial learning rate via initial_lr = max_lr / div_factor. Default: 25.0.

final_div_factor

Minimum learning rate at the end via min_lr = initial_lr / final_div_factor. Default: 10000.0.

mode

One of {triangular, triangular2, exp_range}. If scale_fn is not None, it's set to None. Default: 'triangular'.

TYPE: str DEFAULT: 'triangular'

gamma

Constant in 'exp_range' calculating fuction: gamma**(cycle_iterations). Default: 1.0

DEFAULT: 1.0

scale_fn

Custom scaling policy defined by a single argument lambda function. If it's not None, 'mode' is ignored. Default: None

DEFAULT: None

scale_mode

One of {'cycle', 'iterations'}. Determine scale_fn is evaluated on cycle number or cycle iterations. Default: 'cycle'

DEFAULT: 'cycle'

steps_per_epoch

Number of steps per epoch.

TYPE: int

epochs

Number of total epochs.

TYPE: int

Source code in mindcv/scheduler/dynamic_lr.py
def cyclic_lr(\n    base_lr: float,\n    max_lr: float,\n    step_size_up: int = 2000,\n    step_size_down=None,\n    mode: str = \"triangular\",\n    gamma=1.0,\n    scale_fn=None,\n    scale_mode=\"cycle\",\n    *,\n    steps_per_epoch: int,\n    epochs: int,\n):\n\"\"\"\n    Cyclic learning rate scheduler based on\n    '\"Cyclical Learning Rates for Training Neural Networks\" <https://arxiv.org/abs/1708.07120>'\n\n    Args:\n        base_lr: Lower learning rate boundaries in each cycle.\n        max_lr: Upper learning rate boundaries in each cycle.\n        step_size_up: Number of steps in the increasing half in each cycle. Default: 2000.\n        step_size_down: Number of steps in the increasing half in each cycle. If step_size_down\n            is None, it's set to step_size_up. Default: None.\n        div_factor: Initial learning rate via initial_lr = max_lr / div_factor.\n            Default: 25.0.\n        final_div_factor: Minimum learning rate at the end via\n            min_lr = initial_lr / final_div_factor. Default: 10000.0.\n        mode: One of {triangular, triangular2, exp_range}. If scale_fn is not None, it's set to\n            None. Default: 'triangular'.\n        gamma: Constant in 'exp_range' calculating fuction: gamma**(cycle_iterations).\n            Default: 1.0\n        scale_fn: Custom scaling policy defined by a single argument lambda function. If it's\n            not None, 'mode' is ignored. Default: None\n        scale_mode: One of {'cycle', 'iterations'}. Determine scale_fn is evaluated on cycle\n            number or cycle iterations. Default: 'cycle'\n        steps_per_epoch: Number of steps per epoch.\n        epochs: Number of total epochs.\n    \"\"\"\n\n    def _triangular_scale_fn(x):\n        return 1.0\n\n    def _triangular2_scale_fn(x):\n        return 1 / (2.0**(x - 1))\n\n    def _exp_range_scale_fn(x):\n        return gamma**x\n\n    steps = steps_per_epoch * epochs\n    step_size_up = float(step_size_up)\n    step_size_down = float(step_size_down) if step_size_down is not None else step_size_up\n    total_size = step_size_up + step_size_down\n    step_ratio = step_size_up / total_size\n    if scale_fn is None:\n        if mode == \"triangular\":\n            scale_fn = _triangular_scale_fn\n            scale_mode = \"cycle\"\n        elif mode == \"triangular2\":\n            scale_fn = _triangular2_scale_fn\n            scale_mode = \"cycle\"\n        elif mode == \"exp_range\":\n            scale_fn = _exp_range_scale_fn\n            scale_mode = \"iterations\"\n    lrs = []\n    for i in range(steps):\n        cycle = math.floor(1 + i / total_size)\n        x = 1.0 + i / total_size - cycle\n        if x <= step_ratio:\n            scale_factor = x / step_ratio\n        else:\n            scale_factor = (x - 1) / (step_ratio - 1)\n        base_height = (max_lr - base_lr) * scale_factor\n        if scale_mode == \"cycle\":\n            lrs.append(base_lr + base_height * scale_fn(cycle))\n        else:\n            lrs.append(base_lr + base_height * scale_fn(i))\n    return lrs\n
"},{"location":"reference/scheduler/#mindcv.scheduler.dynamic_lr.one_cycle_lr","title":"mindcv.scheduler.dynamic_lr.one_cycle_lr(max_lr, pct_start=0.3, anneal_strategy='cos', div_factor=25.0, final_div_factor=10000.0, three_phase=False, *, steps_per_epoch, epochs)","text":"

OneCycle learning rate scheduler based on '\"Super-Convergence: Very Fast Training of Neural Networks Using Large Learning Rates\" https://arxiv.org/abs/1708.07120'

PARAMETER DESCRIPTION max_lr

Upper learning rate boundaries in the cycle.

TYPE: float

pct_start

The percentage of the number of steps of increasing learning rate in the cycle. Default: 0.3.

TYPE: float DEFAULT: 0.3

anneal_strategy

Define the annealing strategy: \"cos\" for cosine annealing, \"linear\" for linear annealing. Default: \"cos\".

TYPE: str DEFAULT: 'cos'

div_factor

Initial learning rate via initial_lr = max_lr / div_factor. Default: 25.0.

TYPE: float DEFAULT: 25.0

final_div_factor

Minimum learning rate at the end via min_lr = initial_lr / final_div_factor. Default: 10000.0.

TYPE: float DEFAULT: 10000.0

three_phase

If True, learning rate will be updated by three-phase according to \"final_div_factor\". Otherwise, learning rate will be updated by two-phase. Default: False.

TYPE: bool DEFAULT: False

steps_per_epoch

Number of steps per epoch.

TYPE: int

epochs

Number of total epochs.

TYPE: int

Source code in mindcv/scheduler/dynamic_lr.py
def one_cycle_lr(\n    max_lr: float,\n    pct_start: float = 0.3,\n    anneal_strategy: str = \"cos\",\n    div_factor: float = 25.0,\n    final_div_factor: float = 10000.0,\n    three_phase: bool = False,\n    *,\n    steps_per_epoch: int,\n    epochs: int,\n):\n\"\"\"\n    OneCycle learning rate scheduler based on\n    '\"Super-Convergence: Very Fast Training of Neural Networks Using Large Learning Rates\"\n    <https://arxiv.org/abs/1708.07120>'\n\n    Args:\n        max_lr: Upper learning rate boundaries in the cycle.\n        pct_start: The percentage of the number of steps of increasing learning rate\n            in the cycle. Default: 0.3.\n        anneal_strategy: Define the annealing strategy: \"cos\" for cosine annealing,\n            \"linear\" for linear annealing. Default: \"cos\".\n        div_factor: Initial learning rate via initial_lr = max_lr / div_factor.\n            Default: 25.0.\n        final_div_factor: Minimum learning rate at the end via\n            min_lr = initial_lr / final_div_factor. Default: 10000.0.\n        three_phase: If True, learning rate will be updated by three-phase according to\n            \"final_div_factor\". Otherwise, learning rate will be updated by two-phase.\n            Default: False.\n        steps_per_epoch: Number of steps per epoch.\n        epochs: Number of total epochs.\n    \"\"\"\n\n    def _annealing_cos(start, end, pct):\n        cos_out = math.cos(math.pi * pct) + 1\n        return end + (start - end) / 2.0 * cos_out\n\n    def _annealing_linear(start, end, pct):\n        return (end - start) * pct + start\n\n    initial_lr = max_lr / div_factor\n    min_lr = initial_lr / final_div_factor\n    steps = steps_per_epoch * epochs\n    step_size_up = float(pct_start * steps) - 1\n    step_size_down = float(2 * pct_start * steps) - 2\n    step_size_end = float(steps) - 1\n    if anneal_strategy == \"cos\":\n        anneal_func = _annealing_cos\n    elif anneal_strategy == \"linear\":\n        anneal_func = _annealing_linear\n    else:\n        raise ValueError(f\"anneal_strategy must be one of 'cos' or 'linear', but got {anneal_strategy}\")\n    lrs = []\n    for i in range(steps):\n        if three_phase:\n            if i <= step_size_up:\n                lrs.append(anneal_func(initial_lr, max_lr, i / step_size_up))\n            elif step_size_up < i <= step_size_down:\n                lrs.append(anneal_func(max_lr, initial_lr, (i - step_size_up) / (step_size_down - step_size_up)))\n            else:\n                lrs.append(anneal_func(initial_lr, min_lr, (i - step_size_down) / (step_size_end - step_size_down)))\n        else:\n            if i <= step_size_up:\n                lrs.append(anneal_func(initial_lr, max_lr, i / step_size_up))\n            else:\n                lrs.append(anneal_func(max_lr, min_lr, (i - step_size_up) / (step_size_end - step_size_up)))\n    return lrs\n
"},{"location":"reference/utils/","title":"Utility","text":""},{"location":"reference/utils/#logger","title":"Logger","text":""},{"location":"reference/utils/#mindcv.utils.logger.set_logger","title":"mindcv.utils.logger.set_logger(name=None, output_dir=None, rank=0, log_level=logging.INFO, color=True)","text":"

Initialize the logger.

If the logger has not been initialized, this method will initialize the logger by adding one or two handlers, otherwise the initialized logger will be directly returned. During initialization, only logger of the master process is added console handler. If output_dir is specified, all loggers will be added file handler.

PARAMETER DESCRIPTION name

Logger name. Defaults to None to set up root logger.

TYPE: Optional[str] DEFAULT: None

output_dir

The directory to save log.

TYPE: Optional[str] DEFAULT: None

rank

Process rank in the distributed training. Defaults to 0.

TYPE: int DEFAULT: 0

log_level

Verbosity level of the logger. Defaults to logging.INFO.

TYPE: int DEFAULT: logging.INFO

color

If True, color the output. Defaults to True.

TYPE: bool DEFAULT: True

RETURNS DESCRIPTION logging.Logger

logging.Logger: A initialized logger.

Source code in mindcv/utils/logger.py
def set_logger(\n    name: Optional[str] = None,\n    output_dir: Optional[str] = None,\n    rank: int = 0,\n    log_level: int = logging.INFO,\n    color: bool = True,\n) -> logging.Logger:\n\"\"\"Initialize the logger.\n\n    If the logger has not been initialized, this method will initialize the\n    logger by adding one or two handlers, otherwise the initialized logger will\n    be directly returned. During initialization, only logger of the master\n    process is added console handler. If ``output_dir`` is specified, all loggers\n    will be added file handler.\n\n    Args:\n        name: Logger name. Defaults to None to set up root logger.\n        output_dir: The directory to save log.\n        rank: Process rank in the distributed training. Defaults to 0.\n        log_level: Verbosity level of the logger. Defaults to ``logging.INFO``.\n        color: If True, color the output. Defaults to True.\n\n    Returns:\n        logging.Logger: A initialized logger.\n    \"\"\"\n    if name in logger_initialized:\n        return logger_initialized[name]\n\n    # get root logger if name is None\n    logger = logging.getLogger(name)\n    logger.setLevel(log_level)\n    # the messages of this logger will not be propagated to its parent\n    logger.propagate = False\n\n    fmt = \"%(asctime)s %(name)s %(levelname)s - %(message)s\"\n    datefmt = \"[%Y-%m-%d %H:%M:%S]\"\n\n    # create console handler for master process\n    if rank == 0:\n        if color:\n            if has_rich:\n                console_handler = RichHandler(level=log_level, log_time_format=datefmt)\n            elif has_termcolor:\n                console_handler = logging.StreamHandler(stream=sys.stdout)\n                console_handler.setLevel(log_level)\n                console_handler.setFormatter(_ColorfulFormatter(fmt=fmt, datefmt=datefmt))\n            else:\n                raise NotImplementedError(\"If you want color, 'rich' or 'termcolor' has to be installed!\")\n        else:\n            console_handler = logging.StreamHandler(stream=sys.stdout)\n            console_handler.setLevel(log_level)\n            console_handler.setFormatter(logging.Formatter(fmt=fmt, datefmt=datefmt))\n        logger.addHandler(console_handler)\n\n    if output_dir is not None:\n        os.makedirs(output_dir, exist_ok=True)\n        file_handler = logging.FileHandler(os.path.join(output_dir, f\"rank{rank}.log\"))\n        file_handler.setLevel(log_level)\n        file_handler.setFormatter(logging.Formatter(fmt=fmt, datefmt=datefmt))\n        logger.addHandler(file_handler)\n\n    logger_initialized[name] = logger\n    return logger\n
"},{"location":"reference/utils/#callbacks","title":"Callbacks","text":""},{"location":"reference/utils/#mindcv.utils.callbacks.StateMonitor","title":"mindcv.utils.callbacks.StateMonitor","text":"

Bases: Callback

Train loss and validation accuracy monitor, after each epoch save the best checkpoint file with the highest validation accuracy.

Source code in mindcv/utils/callbacks.py
class StateMonitor(Callback):\n\"\"\"\n    Train loss and validation accuracy monitor, after each epoch save the\n    best checkpoint file with the highest validation accuracy.\n    \"\"\"\n\n    def __init__(\n        self,\n        model,\n        model_name=\"\",\n        model_ema=False,\n        last_epoch=0,\n        dataset_sink_mode=True,\n        dataset_val=None,\n        metric_name=(\"accuracy\",),\n        val_interval=1,\n        val_start_epoch=1,\n        save_best_ckpt=True,\n        ckpt_save_dir=\"./\",\n        ckpt_save_interval=1,\n        ckpt_save_policy=None,\n        ckpt_keep_max=10,\n        summary_dir=\"./\",\n        log_interval=100,\n        rank_id=None,\n        device_num=None,\n    ):\n        super().__init__()\n        # model\n        self.model = model\n        self.model_name = model_name\n        self.model_ema = model_ema\n        self.last_epoch = last_epoch\n        self.dataset_sink_mode = dataset_sink_mode\n        # evaluation\n        self.dataset_val = dataset_val\n        self.metric_name = metric_name\n        self.val_interval = val_interval\n        self.val_start_epoch = val_start_epoch\n        # logging\n        self.best_res = 0\n        self.best_epoch = -1\n        self.save_best_ckpt = save_best_ckpt\n        self.ckpt_save_dir = ckpt_save_dir\n        self.ckpt_save_interval = ckpt_save_interval\n        self.ckpt_save_policy = ckpt_save_policy\n        self.ckpt_keep_max = ckpt_keep_max\n        self.ckpt_manager = CheckpointManager(ckpt_save_policy=self.ckpt_save_policy)\n        self._need_flush_from_cache = True\n        self.summary_dir = summary_dir\n        self.log_interval = log_interval\n        # system\n        self.rank_id = rank_id if rank_id is not None else 0\n        self.device_num = device_num if rank_id is not None else 1\n        if self.rank_id in [0, None]:\n            os.makedirs(ckpt_save_dir, exist_ok=True)\n            self.log_file = os.path.join(ckpt_save_dir, \"result.log\")\n            log_line = \"\".join(\n                f\"{s:<20}\" for s in [\"Epoch\", \"TrainLoss\", *metric_name, \"TrainTime\", \"EvalTime\", \"TotalTime\"]\n            )\n            with open(self.log_file, \"w\", encoding=\"utf-8\") as fp:  # writing the title of result.log\n                fp.write(log_line + \"\\n\")\n        if self.device_num > 1:\n            self.all_reduce = AllReduceSum()\n        # timestamp\n        self.step_ts = None\n        self.epoch_ts = None\n        self.step_time_accum = 0\n        # model_ema\n        if self.model_ema:\n            self.hyper_map = ops.HyperMap()\n            self.online_params = ParameterTuple(self.model.train_network.get_parameters())\n            self.swap_params = self.online_params.clone(\"swap\", \"zeros\")\n\n    def __enter__(self):\n        self.summary_record = SummaryRecord(self.summary_dir)\n        return self\n\n    def __exit__(self, *exc_args):\n        self.summary_record.close()\n\n    def apply_eval(self, run_context):\n\"\"\"Model evaluation, return validation accuracy.\"\"\"\n        if self.model_ema:\n            cb_params = run_context.original_args()\n            self.hyper_map(ops.assign, self.swap_params, self.online_params)\n            ema_dict = dict()\n            net = self._get_network_from_cbp(cb_params)\n            for param in net.get_parameters():\n                if param.name.startswith(\"ema\"):\n                    new_name = param.name.split(\"ema.\")[1]\n                    ema_dict[new_name] = param.data\n            load_param_into_net(self.model.train_network.network, ema_dict)\n            res_dict = self.model.eval(self.dataset_val, dataset_sink_mode=False)\n            self.hyper_map(ops.assign, self.online_params, self.swap_params)\n        else:\n            res_dict = self.model.eval(self.dataset_val, dataset_sink_mode=False)\n        res_array = ms.Tensor(list(res_dict.values()), ms.float32)\n        if self.device_num > 1:\n            res_array = self.all_reduce(res_array)\n            res_array /= self.device_num\n        res_array = res_array.asnumpy()\n        return res_array\n\n    def on_train_step_begin(self, run_context):\n        self.step_ts = time()\n\n    def on_train_epoch_begin(self, run_context):\n        self.epoch_ts = time()\n\n    def on_train_step_end(self, run_context):\n        cb_params = run_context.original_args()\n        num_epochs = cb_params.epoch_num\n        num_batches = cb_params.batch_num\n        # num_steps = num_batches * num_epochs\n        # cur_x start from 1, end at num_xs, range: [1, num_xs]\n        cur_step = cb_params.cur_step_num + self.last_epoch * num_batches\n        cur_epoch = cb_params.cur_epoch_num + self.last_epoch\n        cur_batch = (cur_step - 1) % num_batches + 1\n\n        self.step_time_accum += time() - self.step_ts\n        if cur_batch % self.log_interval == 0 or cur_batch == num_batches or cur_batch == 1:\n            lr = self._get_lr_from_cbp(cb_params)\n            loss = self._get_loss_from_cbp(cb_params)\n            _logger.info(\n                f\"Epoch: [{cur_epoch}/{num_epochs}], \"\n                f\"batch: [{cur_batch}/{num_batches}], \"\n                f\"loss: {loss.asnumpy():.6f}, \"\n                f\"lr: {lr.asnumpy():.6f}, \"\n                f\"time: {self.step_time_accum:.6f}s\"\n            )\n            self.step_time_accum = 0\n\n    def on_train_epoch_end(self, run_context):\n\"\"\"\n        After epoch, print train loss and val accuracy,\n        save the best ckpt file with the highest validation accuracy.\n        \"\"\"\n        cb_params = run_context.original_args()\n        num_epochs = cb_params.epoch_num\n        num_batches = cb_params.batch_num\n        cur_step = cb_params.cur_step_num + self.last_epoch * num_batches\n        cur_epoch = cb_params.cur_epoch_num + self.last_epoch\n        cur_batch = (cur_step - 1) % num_batches + 1\n\n        train_time = time() - self.epoch_ts\n        loss = self._get_loss_from_cbp(cb_params)\n\n        val_time = 0\n        res = np.zeros(len(self.metric_name), dtype=np.float32)\n        # val while training if validation loader is not None\n        if (\n            self.dataset_val is not None\n            and cur_epoch >= self.val_start_epoch\n            and (cur_epoch - self.val_start_epoch) % self.val_interval == 0\n        ):\n            val_time = time()\n            res = self.apply_eval(run_context)\n            val_time = time() - val_time\n            # record val acc\n            metric_str = \"Validation \"\n            for i in range(len(self.metric_name)):\n                metric_str += f\"{self.metric_name[i]}: {res[i]:.4%}, \"\n            metric_str += f\"time: {val_time:.6f}s\"\n            _logger.info(metric_str)\n            # save the best ckpt file\n            if res[0] > self.best_res:\n                self.best_res = res[0]\n                self.best_epoch = cur_epoch\n                _logger.info(f\"=> New best val acc: {res[0]:.4%}\")\n\n        # save checkpoint\n        if self.rank_id in [0, None]:\n            if self.save_best_ckpt and self.best_epoch == cur_epoch:  # always save ckpt if cur epoch got best acc\n                best_ckpt_save_path = os.path.join(self.ckpt_save_dir, f\"{self.model_name}_best.ckpt\")\n                save_checkpoint(cb_params.train_network, best_ckpt_save_path, async_save=True)\n            if (cur_epoch % self.ckpt_save_interval == 0) or (cur_epoch == num_epochs):\n                if self._need_flush_from_cache:\n                    self._flush_from_cache(cb_params)\n                # save optim for resume\n                optimizer = self._get_optimizer_from_cbp(cb_params)\n                optim_save_path = os.path.join(self.ckpt_save_dir, f\"optim_{self.model_name}.ckpt\")\n                save_checkpoint(optimizer, optim_save_path, async_save=True)\n                # keep checkpoint files number equal max number.\n                ckpt_save_path = os.path.join(self.ckpt_save_dir, f\"{self.model_name}-{cur_epoch}_{cur_batch}.ckpt\")\n                _logger.info(f\"Saving model to {ckpt_save_path}\")\n                self.ckpt_manager.save_ckpoint(\n                    cb_params.train_network,\n                    num_ckpt=self.ckpt_keep_max,\n                    metric=res[0],\n                    save_path=ckpt_save_path,\n                )\n\n        # logging\n        total_time = time() - self.epoch_ts\n        _logger.info(\n            f\"Total time since last epoch: {total_time:.6f}(train: {train_time:.6f}, val: {val_time:.6f})s, \"\n            f\"ETA: {(num_epochs - cur_epoch) * total_time:.6f}s\"\n        )\n        _logger.info(\"-\" * 80)\n        if self.rank_id in [0, None]:\n            log_line = \"\".join(\n                f\"{s:<20}\"\n                for s in [\n                    f\"{cur_epoch}\",\n                    f\"{loss.asnumpy():.6f}\",\n                    *[f\"{i:.4%}\" for i in res],\n                    f\"{train_time:.2f}\",\n                    f\"{val_time:.2f}\",\n                    f\"{total_time:.2f}\",\n                ]\n            )\n            with open(self.log_file, \"a\", encoding=\"utf-8\") as fp:\n                fp.write(log_line + \"\\n\")\n\n        # summary\n        self.summary_record.add_value(\"scalar\", f\"train_loss_{self.rank_id}\", loss)\n        for i in range(len(res)):\n            self.summary_record.add_value(\n                \"scalar\", f\"val_{self.metric_name[i]}_{self.rank_id}\", Tensor(res[i], dtype=ms.float32)\n            )\n        self.summary_record.record(cur_step)\n\n    def on_train_end(self, run_context):\n        _logger.info(\"Finish training!\")\n        if self.dataset_val is not None:\n            _logger.info(\n                f\"The best validation {self.metric_name[0]} is: {self.best_res:.4%} at epoch {self.best_epoch}.\"\n            )\n        _logger.info(\"=\" * 80)\n\n    def _get_network_from_cbp(self, cb_params):\n        if self.dataset_sink_mode:\n            network = cb_params.train_network.network\n        else:\n            network = cb_params.train_network\n        return network\n\n    def _get_optimizer_from_cbp(self, cb_params):\n        if cb_params.optimizer is not None:\n            optimizer = cb_params.optimizer\n        elif self.dataset_sink_mode:\n            optimizer = cb_params.train_network.network.optimizer\n        else:\n            optimizer = cb_params.train_network.optimizer\n        return optimizer\n\n    def _get_lr_from_cbp(self, cb_params):\n        optimizer = self._get_optimizer_from_cbp(cb_params)\n        if optimizer.global_step < 1:\n            _logger.warning(\n                \"`global_step` of optimizer is less than 1. It seems to be a overflow at the first step. \"\n                \"If you keep seeing this message, it means that the optimizer never actually called.\"\n            )\n            optim_step = Tensor((0,), ms.int32)\n        else:  # if the optimizer is successfully called, the global_step will actually be the value of next step.\n            optim_step = optimizer.global_step - 1\n        if optimizer.dynamic_lr:\n            lr = optimizer.learning_rate(optim_step)[0]\n        else:\n            lr = optimizer.learning_rate\n        return lr\n\n    def _get_loss_from_cbp(self, cb_params):\n\"\"\"\n        Get loss from the network output.\n        Args:\n            cb_params (_InternalCallbackParam): Callback parameters.\n        Returns:\n            Union[Tensor, None], if parse loss success, will return a Tensor value(shape is [1]), else return None.\n        \"\"\"\n        output = cb_params.net_outputs\n        if output is None:\n            _logger.warning(\"Can not find any output by this network, so SummaryCollector will not collect loss.\")\n            return None\n\n        if isinstance(output, (int, float, Tensor)):\n            loss = output\n        elif isinstance(output, (list, tuple)) and output:\n            # If the output is a list, since the default network returns loss first,\n            # we assume that the first one is loss.\n            loss = output[0]\n        else:\n            _logger.warning(\n                \"The output type could not be identified, expect type is one of \"\n                \"[int, float, Tensor, list, tuple], so no loss was recorded in SummaryCollector.\"\n            )\n            return None\n\n        if not isinstance(loss, Tensor):\n            loss = Tensor(loss)\n\n        loss = Tensor(np.mean(loss.asnumpy()))\n        return loss\n\n    def _flush_from_cache(self, cb_params):\n\"\"\"Flush cache data to host if tensor is cache enable.\"\"\"\n        has_cache_params = False\n        params = cb_params.train_network.get_parameters()\n        for param in params:\n            if param.cache_enable:\n                has_cache_params = True\n                Tensor(param).flush_from_cache()\n        if not has_cache_params:\n            self._need_flush_from_cache = False\n
"},{"location":"reference/utils/#mindcv.utils.callbacks.StateMonitor.apply_eval","title":"mindcv.utils.callbacks.StateMonitor.apply_eval(run_context)","text":"

Model evaluation, return validation accuracy.

Source code in mindcv/utils/callbacks.py
def apply_eval(self, run_context):\n\"\"\"Model evaluation, return validation accuracy.\"\"\"\n    if self.model_ema:\n        cb_params = run_context.original_args()\n        self.hyper_map(ops.assign, self.swap_params, self.online_params)\n        ema_dict = dict()\n        net = self._get_network_from_cbp(cb_params)\n        for param in net.get_parameters():\n            if param.name.startswith(\"ema\"):\n                new_name = param.name.split(\"ema.\")[1]\n                ema_dict[new_name] = param.data\n        load_param_into_net(self.model.train_network.network, ema_dict)\n        res_dict = self.model.eval(self.dataset_val, dataset_sink_mode=False)\n        self.hyper_map(ops.assign, self.online_params, self.swap_params)\n    else:\n        res_dict = self.model.eval(self.dataset_val, dataset_sink_mode=False)\n    res_array = ms.Tensor(list(res_dict.values()), ms.float32)\n    if self.device_num > 1:\n        res_array = self.all_reduce(res_array)\n        res_array /= self.device_num\n    res_array = res_array.asnumpy()\n    return res_array\n
"},{"location":"reference/utils/#mindcv.utils.callbacks.StateMonitor.on_train_epoch_end","title":"mindcv.utils.callbacks.StateMonitor.on_train_epoch_end(run_context)","text":"

After epoch, print train loss and val accuracy, save the best ckpt file with the highest validation accuracy.

Source code in mindcv/utils/callbacks.py
def on_train_epoch_end(self, run_context):\n\"\"\"\n    After epoch, print train loss and val accuracy,\n    save the best ckpt file with the highest validation accuracy.\n    \"\"\"\n    cb_params = run_context.original_args()\n    num_epochs = cb_params.epoch_num\n    num_batches = cb_params.batch_num\n    cur_step = cb_params.cur_step_num + self.last_epoch * num_batches\n    cur_epoch = cb_params.cur_epoch_num + self.last_epoch\n    cur_batch = (cur_step - 1) % num_batches + 1\n\n    train_time = time() - self.epoch_ts\n    loss = self._get_loss_from_cbp(cb_params)\n\n    val_time = 0\n    res = np.zeros(len(self.metric_name), dtype=np.float32)\n    # val while training if validation loader is not None\n    if (\n        self.dataset_val is not None\n        and cur_epoch >= self.val_start_epoch\n        and (cur_epoch - self.val_start_epoch) % self.val_interval == 0\n    ):\n        val_time = time()\n        res = self.apply_eval(run_context)\n        val_time = time() - val_time\n        # record val acc\n        metric_str = \"Validation \"\n        for i in range(len(self.metric_name)):\n            metric_str += f\"{self.metric_name[i]}: {res[i]:.4%}, \"\n        metric_str += f\"time: {val_time:.6f}s\"\n        _logger.info(metric_str)\n        # save the best ckpt file\n        if res[0] > self.best_res:\n            self.best_res = res[0]\n            self.best_epoch = cur_epoch\n            _logger.info(f\"=> New best val acc: {res[0]:.4%}\")\n\n    # save checkpoint\n    if self.rank_id in [0, None]:\n        if self.save_best_ckpt and self.best_epoch == cur_epoch:  # always save ckpt if cur epoch got best acc\n            best_ckpt_save_path = os.path.join(self.ckpt_save_dir, f\"{self.model_name}_best.ckpt\")\n            save_checkpoint(cb_params.train_network, best_ckpt_save_path, async_save=True)\n        if (cur_epoch % self.ckpt_save_interval == 0) or (cur_epoch == num_epochs):\n            if self._need_flush_from_cache:\n                self._flush_from_cache(cb_params)\n            # save optim for resume\n            optimizer = self._get_optimizer_from_cbp(cb_params)\n            optim_save_path = os.path.join(self.ckpt_save_dir, f\"optim_{self.model_name}.ckpt\")\n            save_checkpoint(optimizer, optim_save_path, async_save=True)\n            # keep checkpoint files number equal max number.\n            ckpt_save_path = os.path.join(self.ckpt_save_dir, f\"{self.model_name}-{cur_epoch}_{cur_batch}.ckpt\")\n            _logger.info(f\"Saving model to {ckpt_save_path}\")\n            self.ckpt_manager.save_ckpoint(\n                cb_params.train_network,\n                num_ckpt=self.ckpt_keep_max,\n                metric=res[0],\n                save_path=ckpt_save_path,\n            )\n\n    # logging\n    total_time = time() - self.epoch_ts\n    _logger.info(\n        f\"Total time since last epoch: {total_time:.6f}(train: {train_time:.6f}, val: {val_time:.6f})s, \"\n        f\"ETA: {(num_epochs - cur_epoch) * total_time:.6f}s\"\n    )\n    _logger.info(\"-\" * 80)\n    if self.rank_id in [0, None]:\n        log_line = \"\".join(\n            f\"{s:<20}\"\n            for s in [\n                f\"{cur_epoch}\",\n                f\"{loss.asnumpy():.6f}\",\n                *[f\"{i:.4%}\" for i in res],\n                f\"{train_time:.2f}\",\n                f\"{val_time:.2f}\",\n                f\"{total_time:.2f}\",\n            ]\n        )\n        with open(self.log_file, \"a\", encoding=\"utf-8\") as fp:\n            fp.write(log_line + \"\\n\")\n\n    # summary\n    self.summary_record.add_value(\"scalar\", f\"train_loss_{self.rank_id}\", loss)\n    for i in range(len(res)):\n        self.summary_record.add_value(\n            \"scalar\", f\"val_{self.metric_name[i]}_{self.rank_id}\", Tensor(res[i], dtype=ms.float32)\n        )\n    self.summary_record.record(cur_step)\n
"},{"location":"reference/utils/#mindcv.utils.callbacks.ValCallback","title":"mindcv.utils.callbacks.ValCallback","text":"

Bases: Callback

Source code in mindcv/utils/callbacks.py
class ValCallback(Callback):\n    def __init__(self, log_interval=100):\n        super().__init__()\n        self.log_interval = log_interval\n        self.ts = time()\n\n    def on_eval_step_end(self, run_context):\n        cb_params = run_context.original_args()\n        num_batches = cb_params.batch_num\n        cur_step = cb_params.cur_step_num\n\n        if cur_step % self.log_interval == 0 or cur_step == num_batches:\n            print(f\"batch: {cur_step}/{num_batches}, time: {time() - self.ts:.6f}s\")\n            self.ts = time()\n
"},{"location":"reference/utils/#train-step","title":"Train Step","text":""},{"location":"reference/utils/#mindcv.utils.train_step.TrainStep","title":"mindcv.utils.train_step.TrainStep","text":"

Bases: nn.TrainOneStepWithLossScaleCell

Training step with loss scale.

The customized trainOneStepCell also supported following algorithms
  • Exponential Moving Average (EMA)
  • Gradient Clipping
  • Gradient Accumulation
Source code in mindcv/utils/train_step.py
class TrainStep(nn.TrainOneStepWithLossScaleCell):\n\"\"\"Training step with loss scale.\n\n    The customized trainOneStepCell also supported following algorithms:\n        * Exponential Moving Average (EMA)\n        * Gradient Clipping\n        * Gradient Accumulation\n    \"\"\"\n\n    def __init__(\n        self,\n        network,\n        optimizer,\n        scale_sense=1.0,\n        ema=False,\n        ema_decay=0.9999,\n        clip_grad=False,\n        clip_value=15.0,\n        gradient_accumulation_steps=1,\n    ):\n        super(TrainStep, self).__init__(network, optimizer, scale_sense)\n        self.ema = ema\n        self.ema_decay = ema_decay\n        self.updates = Parameter(Tensor(0.0, ms.float32))\n        self.clip_grad = clip_grad\n        self.clip_value = clip_value\n        if self.ema:\n            self.weights_all = ms.ParameterTuple(list(network.get_parameters()))\n            self.ema_weight = self.weights_all.clone(\"ema\", init=\"same\")\n\n        self.accumulate_grad = gradient_accumulation_steps > 1\n        if self.accumulate_grad:\n            self.gradient_accumulation = GradientAccumulation(gradient_accumulation_steps, optimizer, self.grad_reducer)\n\n    def ema_update(self):\n        self.updates += 1\n        # ema factor is corrected by (1 - exp(-t/T)), where `t` means time and `T` means temperature.\n        ema_decay = self.ema_decay * (1 - F.exp(-self.updates / 2000))\n        # update trainable parameters\n        success = self.hyper_map(F.partial(_ema_op, ema_decay), self.ema_weight, self.weights_all)\n        return success\n\n    def construct(self, *inputs):\n        weights = self.weights\n        loss = self.network(*inputs)\n        scaling_sens = self.scale_sense\n\n        status, scaling_sens = self.start_overflow_check(loss, scaling_sens)\n\n        scaling_sens_filled = ops.ones_like(loss) * F.cast(scaling_sens, F.dtype(loss))\n        grads = self.grad(self.network, weights)(*inputs, scaling_sens_filled)\n        grads = self.hyper_map(F.partial(_grad_scale, scaling_sens), grads)\n\n        # todo: When to clip grad? Do we need to clip grad after grad reduction? What if grad accumulation is needed?\n        if self.clip_grad:\n            grads = ops.clip_by_global_norm(grads, clip_norm=self.clip_value)\n\n        if self.loss_scaling_manager:  # scale_sense = update_cell: Cell --> TrainOneStepWithLossScaleCell.construct\n            if self.accumulate_grad:\n                # todo: GradientAccumulation only call grad_reducer at the step where the accumulation is completed.\n                #  So checking the overflow status is after gradient reduction, is this correct?\n                # get the overflow buffer\n                cond = self.get_overflow_status(status, grads)\n                overflow = self.process_loss_scale(cond)\n                # if there is no overflow, do optimize\n                if not overflow:\n                    loss = self.gradient_accumulation(loss, grads)\n            else:\n                # apply grad reducer on grads\n                grads = self.grad_reducer(grads)\n                # get the overflow buffer\n                cond = self.get_overflow_status(status, grads)\n                overflow = self.process_loss_scale(cond)\n                # if there is no overflow, do optimize\n                if not overflow:\n                    loss = F.depend(loss, self.optimizer(grads))\n        else:  # scale_sense = loss_scale: Tensor --> TrainOneStepCell.construct\n            if self.accumulate_grad:\n                loss = self.gradient_accumulation(loss, grads)\n            else:\n                grads = self.grad_reducer(grads)\n                loss = F.depend(loss, self.optimizer(grads))\n\n        if self.ema:\n            loss = F.depend(loss, self.ema_update())\n\n        return loss\n
"},{"location":"reference/utils/#trainer-factory","title":"Trainer Factory","text":""},{"location":"reference/utils/#mindcv.utils.trainer_factory.create_trainer","title":"mindcv.utils.trainer_factory.create_trainer(network, loss, optimizer, metrics, amp_level, amp_cast_list, loss_scale_type, loss_scale=1.0, drop_overflow_update=False, ema=False, ema_decay=0.9999, clip_grad=False, clip_value=15.0, gradient_accumulation_steps=1)","text":"

Create Trainer.

PARAMETER DESCRIPTION network

The backbone network to train, evaluate or predict.

TYPE: nn.Cell

loss

The function of calculating loss.

TYPE: nn.Cell

optimizer

The optimizer for training.

TYPE: nn.Cell

metrics

The metrics for model evaluation.

TYPE: Union[dict, set]

amp_level

The level of auto mixing precision training.

TYPE: str

amp_cast_list

At the cell level, custom casting the cell to FP16.

TYPE: str

loss_scale_type

The type of loss scale.

TYPE: str

loss_scale

The value of loss scale.

TYPE: float DEFAULT: 1.0

drop_overflow_update

Whether to execute optimizer if there is an overflow.

TYPE: bool DEFAULT: False

ema

Whether to use exponential moving average of model weights.

TYPE: bool DEFAULT: False

ema_decay

Decay factor for model weights moving average.

TYPE: float DEFAULT: 0.9999

clip_grad

whether to gradient clip.

TYPE: bool DEFAULT: False

clip_value

The value at which to clip gradients.

TYPE: float DEFAULT: 15.0

gradient_accumulation_steps

Accumulate the gradients of n batches before update.

TYPE: int DEFAULT: 1

RETURNS DESCRIPTION

mindspore.Model

Source code in mindcv/utils/trainer_factory.py
def create_trainer(\n    network: nn.Cell,\n    loss: nn.Cell,\n    optimizer: nn.Cell,\n    metrics: Union[dict, set],\n    amp_level: str,\n    amp_cast_list: str,\n    loss_scale_type: str,\n    loss_scale: float = 1.0,\n    drop_overflow_update: bool = False,\n    ema: bool = False,\n    ema_decay: float = 0.9999,\n    clip_grad: bool = False,\n    clip_value: float = 15.0,\n    gradient_accumulation_steps: int = 1,\n):\n\"\"\"Create Trainer.\n\n    Args:\n        network: The backbone network to train, evaluate or predict.\n        loss: The function of calculating loss.\n        optimizer: The optimizer for training.\n        metrics: The metrics for model evaluation.\n        amp_level: The level of auto mixing precision training.\n        amp_cast_list: At the cell level, custom casting the cell to FP16.\n        loss_scale_type: The type of loss scale.\n        loss_scale: The value of loss scale.\n        drop_overflow_update: Whether to execute optimizer if there is an overflow.\n        ema: Whether to use exponential moving average of model weights.\n        ema_decay: Decay factor for model weights moving average.\n        clip_grad: whether to gradient clip.\n        clip_value: The value at which to clip gradients.\n        gradient_accumulation_steps: Accumulate the gradients of n batches before update.\n\n    Returns:\n        mindspore.Model\n\n    \"\"\"\n    if loss_scale < 1.0:\n        raise ValueError(\"Loss scale cannot be less than 1.0!\")\n\n    if drop_overflow_update is False and loss_scale_type.lower() == \"dynamic\":\n        raise ValueError(\"DynamicLossScale ALWAYS drop overflow!\")\n\n    if gradient_accumulation_steps < 1:\n        raise ValueError(\"`gradient_accumulation_steps` must be >= 1!\")\n\n    if not require_customized_train_step(ema, clip_grad, gradient_accumulation_steps, amp_cast_list):\n        mindspore_kwargs = dict(\n            network=network,\n            loss_fn=loss,\n            optimizer=optimizer,\n            metrics=metrics,\n            amp_level=amp_level,\n        )\n        if loss_scale_type.lower() == \"fixed\":\n            mindspore_kwargs[\"loss_scale_manager\"] = FixedLossScaleManager(\n                loss_scale=loss_scale, drop_overflow_update=drop_overflow_update\n            )\n        elif loss_scale_type.lower() == \"dynamic\":\n            mindspore_kwargs[\"loss_scale_manager\"] = DynamicLossScaleManager(\n                init_loss_scale=loss_scale, scale_factor=2, scale_window=2000\n            )\n        elif loss_scale_type.lower() == \"auto\":\n            # We don't explicitly construct LossScaleManager\n            _logger.warning(\n                \"You are using AUTO loss scale, which means the LossScaleManager isn't explicitly pass in \"\n                \"when creating a mindspore.Model instance. \"\n                \"NOTE: mindspore.Model may use LossScaleManager silently. See mindspore.train.amp for details.\"\n            )\n        else:\n            raise ValueError(f\"Loss scale type only support ['fixed', 'dynamic', 'auto'], but got{loss_scale_type}.\")\n        model = Model(**mindspore_kwargs)\n    else:  # require customized train step\n        eval_network = nn.WithEvalCell(network, loss, amp_level in [\"O2\", \"O3\", \"auto\"])\n        auto_mixed_precision(network, amp_level, amp_cast_list)\n        net_with_loss = add_loss_network(network, loss, amp_level)\n        train_step_kwargs = dict(\n            network=net_with_loss,\n            optimizer=optimizer,\n            ema=ema,\n            ema_decay=ema_decay,\n            clip_grad=clip_grad,\n            clip_value=clip_value,\n            gradient_accumulation_steps=gradient_accumulation_steps,\n        )\n        if loss_scale_type.lower() == \"fixed\":\n            loss_scale_manager = FixedLossScaleManager(loss_scale=loss_scale, drop_overflow_update=drop_overflow_update)\n        elif loss_scale_type.lower() == \"dynamic\":\n            loss_scale_manager = DynamicLossScaleManager(init_loss_scale=loss_scale, scale_factor=2, scale_window=2000)\n        else:\n            raise ValueError(f\"Loss scale type only support ['fixed', 'dynamic'], but got{loss_scale_type}.\")\n        update_cell = loss_scale_manager.get_update_cell()\n        # 1. loss_scale_type=\"fixed\", drop_overflow_update=False\n        # --> update_cell=None, TrainStep=TrainOneStepCell(scale_sense=loss_scale)\n        # 2. loss_scale_type: fixed, drop_overflow_update: True\n        # --> update_cell=FixedLossScaleUpdateCell, TrainStep=TrainOneStepWithLossScaleCell(scale_sense=update_cell)\n        # 3. loss_scale_type: dynamic, drop_overflow_update: True\n        # --> update_cell=DynamicLossScaleUpdateCell, TrainStep=TrainOneStepWithLossScaleCell(scale_sense=update_cell)\n        if update_cell is None:\n            train_step_kwargs[\"scale_sense\"] = Tensor(loss_scale, dtype=ms.float32)\n        else:\n            if not context.get_context(\"enable_ge\") and context.get_context(\"device_target\") == \"CPU\":\n                raise ValueError(\n                    \"Only `loss_scale_type` is `fixed` and `drop_overflow_update` is `False`\"\n                    \"are supported on device `CPU`.\"\n                )\n            train_step_kwargs[\"scale_sense\"] = update_cell\n        train_step_cell = TrainStep(**train_step_kwargs).set_train()\n        model = Model(train_step_cell, eval_network=eval_network, metrics=metrics, eval_indexes=[0, 1, 2])\n        # todo: do we need to set model._loss_scale_manager\n    return model\n
"},{"location":"tutorials/configuration/","title":"Configuration","text":"

MindCV can parse the yaml file of the model through the argparse library and PyYAML library to configure parameters. Let's use squeezenet_1.0 model as an example to explain how to configure the corresponding parameters.

"},{"location":"tutorials/configuration/#basic-environment","title":"Basic Environment","text":"
  1. Parameter description
  • mode: Use graph mode (0) or pynative mode (1).

  • distribute: Whether to use distributed.

  1. Sample yaml file

    mode: 0\ndistribute: True\n...\n
  2. Parse parameter setting

    python train.py --mode 0 --distribute False ...\n
  3. Corresponding code example

    args.mode represents the parameter mode, args.distribute represents the parameter distribute.

    def train(args):\n    ms.set_context(mode=args.mode)\n\n    if args.distribute:\n        init()\n        device_num = get_group_size()\n        rank_id = get_rank()\n        ms.set_auto_parallel_context(device_num=device_num,\n                                     parallel_mode='data_parallel',\n                                     gradients_mean=True)\n    else:\n        device_num = None\n        rank_id = None\n    ...\n
"},{"location":"tutorials/configuration/#dataset","title":"Dataset","text":"
  1. Parameter description
  • dataset: dataset name.

  • data_dir: Path of dataset file.

  • shuffle: whether to shuffle the dataset.

  • dataset_download: whether to download the dataset.

  • batch_size: The number of rows in each batch.

  • drop_remainder: Determines whether to drop the last block whose data row number is less than the batch size.

  • num_parallel_workers: Number of workers(threads) to process the dataset in parallel.

  1. Sample yaml file

    dataset: 'imagenet'\ndata_dir: './imagenet2012'\nshuffle: True\ndataset_download: False\nbatch_size: 32\ndrop_remainder: True\nnum_parallel_workers: 8\n...\n
  2. Parse parameter setting

    python train.py ... --dataset imagenet --data_dir ./imagenet2012 --shuffle True \\\n--dataset_download False --batch_size 32 --drop_remainder True \\\n--num_parallel_workers 8 ...\n
  3. Corresponding code example

    def train(args):\n    ...\n    dataset_train = create_dataset(\n        name=args.dataset,\n        root=args.data_dir,\n        split='train',\n        shuffle=args.shuffle,\n        num_samples=args.num_samples,\n        num_shards=device_num,\n        shard_id=rank_id,\n        num_parallel_workers=args.num_parallel_workers,\n        download=args.dataset_download,\n        num_aug_repeats=args.aug_repeats)\n\n    ...\n    target_transform = transforms.OneHot(num_classes) if args.loss == 'BCE' else None\n\n    loader_train = create_loader(\n        dataset=dataset_train,\n        batch_size=args.batch_size,\n        drop_remainder=args.drop_remainder,\n        is_training=True,\n        mixup=args.mixup,\n        cutmix=args.cutmix,\n        cutmix_prob=args.cutmix_prob,\n        num_classes=args.num_classes,\n        transform=transform_list,\n        target_transform=target_transform,\n        num_parallel_workers=args.num_parallel_workers,\n    )\n    ...\n
"},{"location":"tutorials/configuration/#data-augmentation","title":"Data Augmentation","text":"
  1. Parameter description
  • image_resize: the image size after resizing for adapting to the network.

  • scale: random resize scale.

  • ratio: random resize aspect ratio.

  • hfilp: horizontal flip training aug probability.

  • interpolation: image interpolation mode for resize operator.

  • crop_pct: input image center crop percent.

  • color_jitter: color jitter factor.

  • re_prob: the probability of performing erasing.

  1. Sample yaml file

    image_resize: 224\nscale: [0.08, 1.0]\nratio: [0.75, 1.333]\nhflip: 0.5\ninterpolation: 'bilinear'\ncrop_pct: 0.875\ncolor_jitter: [0.4, 0.4, 0.4]\nre_prob: 0.5\n...\n
  2. Parse parameter setting

    python train.py ... --image_resize 224 --scale [0.08, 1.0] --ratio [0.75, 1.333] \\\n--hflip 0.5 --interpolation \"bilinear\" --crop_pct 0.875 \\\n--color_jitter [0.4, 0.4, 0.4] --re_prob 0.5 ...\n
  3. Corresponding code example

    def train(args):\n    ...\n    transform_list = create_transforms(\n        dataset_name=args.dataset,\n        is_training=True,\n        image_resize=args.image_resize,\n        scale=args.scale,\n        ratio=args.ratio,\n        hflip=args.hflip,\n        vflip=args.vflip,\n        color_jitter=args.color_jitter,\n        interpolation=args.interpolation,\n        auto_augment=args.auto_augment,\n        mean=args.mean,\n        std=args.std,\n        re_prob=args.re_prob,\n        re_scale=args.re_scale,\n        re_ratio=args.re_ratio,\n        re_value=args.re_value,\n        re_max_attempts=args.re_max_attempts\n    )\n    ...\n
"},{"location":"tutorials/configuration/#model","title":"Model","text":"
  1. Parameter description
  • model: model name.

  • num_classes: number of label classes.

  • pretrained: whether load pretrained model.

  • ckpt_path: initialize model from this checkpoint.

  • keep_checkpoint_max: max number of checkpoint files.

  • ckpt_save_dir: the path of checkpoint.

  • epoch_size: train epoch size.

  • dataset_sink_mode: the dataset sink mode.

  • amp_level: auto mixed precision level for saving memory and acceleration.

  1. Sample yaml file

    model: 'squeezenet1_0'\nnum_classes: 1000\npretrained: False\nckpt_path: './squeezenet1_0_gpu.ckpt'\nkeep_checkpoint_max: 10\nckpt_save_dir: './ckpt/'\nepoch_size: 200\ndataset_sink_mode: True\namp_level: 'O0'\n...\n
  2. Parse parameter setting

    python train.py ... --model squeezenet1_0 --num_classes 1000 --pretrained False \\\n--ckpt_path ./squeezenet1_0_gpu.ckpt --keep_checkpoint_max 10 \\\n--ckpt_save_path ./ckpt/ --epoch_size 200 --dataset_sink_mode True \\\n--amp_level O0 ...\n
  3. Corresponding code example

    def train(args):\n    ...\n    network = create_model(model_name=args.model,\n        num_classes=args.num_classes,\n        in_channels=args.in_channels,\n        drop_rate=args.drop_rate,\n        drop_path_rate=args.drop_path_rate,\n        pretrained=args.pretrained,\n        checkpoint_path=args.ckpt_path,\n        ema=args.ema\n    )\n    ...\n
"},{"location":"tutorials/configuration/#loss-function","title":"Loss Function","text":"
  1. Parameter description
  • loss: name of loss function, BCE (BinaryCrossEntropy) or CE (CrossEntropy).

  • label_smoothing: use label smoothing.

  1. Sample yaml file

    loss: 'CE'\nlabel_smoothing: 0.1\n...\n
  2. Parse parameter setting

    python train.py ... --loss CE --label_smoothing 0.1 ...\n
  3. Corresponding code example

    def train(args):\n    ...\n    loss = create_loss(name=args.loss,\n        reduction=args.reduction,\n        label_smoothing=args.label_smoothing,\n        aux_factor=args.aux_factor\n     )\n    ...\n
"},{"location":"tutorials/configuration/#learning-rate-scheduler","title":"Learning Rate Scheduler","text":"
  1. Parameter description
  • scheduler: name of scheduler.

  • min_lr: the minimum value of learning rate if the scheduler supports.

  • lr: learning rate.

  • warmup_epochs: warmup epochs.

  • decay_epochs: decay epochs.

  1. Sample yaml file

    scheduler: 'cosine_decay'\nmin_lr: 0.0\nlr: 0.01\nwarmup_epochs: 0\ndecay_epochs: 200\n...\n
  2. Parse parameter setting

    python train.py ... --scheduler cosine_decay --min_lr 0.0 --lr 0.01 \\\n--warmup_epochs 0 --decay_epochs 200 ...\n
  3. Corresponding code example

    def train(args):\n    ...\n    lr_scheduler = create_scheduler(num_batches,\n        scheduler=args.scheduler,\n        lr=args.lr,\n        min_lr=args.min_lr,\n        warmup_epochs=args.warmup_epochs,\n        warmup_factor=args.warmup_factor,\n        decay_epochs=args.decay_epochs,\n        decay_rate=args.decay_rate,\n        milestones=args.multi_step_decay_milestones,\n        num_epochs=args.epoch_size,\n        lr_epoch_stair=args.lr_epoch_stair\n    )\n    ...\n
"},{"location":"tutorials/configuration/#optimizer","title":"Optimizer","text":"
  1. Parameter description
  • opt: name of optimizer.

  • filter_bias_and_bn: filter Bias and BatchNorm.

  • momentum: Hyperparameter of type float, means momentum for the moving average.

  • weight_decay: weight decay (L2 penalty).

  • loss_scale: gradient scaling factor

  • use_nesterov: whether enables the Nesterov momentum

  1. Sample yaml file

    opt: 'momentum'\nfilter_bias_and_bn: True\nmomentum: 0.9\nweight_decay: 0.00007\nloss_scale: 1024\nuse_nesterov: False\n...\n
  2. Parse parameter setting

    python train.py ... --opt momentum --filter_bias_and_bn True --weight_decay 0.00007 \\\n--loss_scale 1024 --use_nesterov False ...\n
  3. Corresponding code example

    def train(args):\n    ...\n    if args.ema:\n        optimizer = create_optimizer(network.trainable_params(),\n            opt=args.opt,\n            lr=lr_scheduler,\n            weight_decay=args.weight_decay,\n            momentum=args.momentum,\n            nesterov=args.use_nesterov,\n            filter_bias_and_bn=args.filter_bias_and_bn,\n            loss_scale=args.loss_scale,\n            checkpoint_path=opt_ckpt_path,\n            eps=args.eps\n        )\n    else:\n        optimizer = create_optimizer(network.trainable_params(),\n            opt=args.opt,\n            lr=lr_scheduler,\n            weight_decay=args.weight_decay,\n            momentum=args.momentum,\n            nesterov=args.use_nesterov,\n            filter_bias_and_bn=args.filter_bias_and_bn,\n            checkpoint_path=opt_ckpt_path,\n            eps=args.eps\n        )\n    ...\n
"},{"location":"tutorials/configuration/#combination-of-yaml-and-parse","title":"Combination of Yaml and Parse","text":"

You can override the parameter settings in the yaml file by using parse to set parameters. Take the following shell command as an example,

python train.py -c ./configs/squeezenet/squeezenet_1.0_gpu.yaml --data_dir ./data\n

The above command overwrites the value of args.data_dir parameter from ./imagenet2012 in yaml file to ./data.

"},{"location":"tutorials/deployment/","title":"Inference Service Deployment","text":"

MindSpore Serving is a lightweight and high-performance service module that helps MindSpore developers efficiently deploy online inference services in the production environment. After completing model training on MindSpore, you can export the MindSpore model and use MindSpore Serving to create an inference service for the model.

This tutorial uses mobilenet_v2_100 network as an example to describe how to deploy the Inference Service based on MindSpore Serving.

"},{"location":"tutorials/deployment/#environment-preparation","title":"Environment Preparation","text":"

Before deploying, ensure that MindSpore Serving has been properly installed and the environment variables are configured. To install and configure MindSpore Serving on your PC, go to the MindSpore Serving installation page.

"},{"location":"tutorials/deployment/#exporting-the-model","title":"Exporting the Model","text":"

To implement cross-platform or hardware inference (e.g., Ascend AI processor, MindSpore device side, GPU, etc.), the model file of MindIR format should be generated by network definition and CheckPoint. In MindSpore, the function of exporting the network model is export and the main parameters are as follows:

  • net: MindSpore network structure.
  • inputs: Network input, the supported input type is Tensor. If multiple values are input, the values should be input at the same time, for example, ms.export(network, ms.Tensor(input1), ms.Tensor(input2), file_name='network', file_format='MINDIR').
  • file_name: Name of the exported model file. If file_name doesn't contain the corresponding suffix (for example, .mindir), the system will automatically add one after file_format is set.
  • file_format: MindSpore currently supports \u2018AIR\u2019, \u2018ONNX\u2019 and \u2018MINDIR\u2019 format for exported models.

The following code uses mobilenet_v2_100 as an example to export the pretrained network model of MindCV and obtain the model file in MindIR format.

from mindcv.models import create_model\nimport numpy as np\nimport mindspore as ms\n\nmodel = create_model(model_name='mobilenet_v2_100', num_classes=1000, pretrained=True)\n\ninput_np = np.random.uniform(0.0, 1.0, size=[1, 3, 224, 224]).astype(np.float32)\n\n# Export mobilenet_v2_100.mindir to the current folder.\nms.export(model, ms.Tensor(input_np), file_name='mobilenet_v2_100', file_format='MINDIR')\n
"},{"location":"tutorials/deployment/#deploying-the-serving-inference-service","title":"Deploying the Serving Inference Service","text":""},{"location":"tutorials/deployment/#configuring-the-service","title":"Configuring the Service","text":"

Start Serving with the following files:

demo\n\u251c\u2500\u2500 mobilenet_v2_100\n\u2502   \u251c\u2500\u2500 1\n\u2502   \u2502   \u2514\u2500\u2500 mobilenet_v2_100.mindir\n\u2502   \u2514\u2500\u2500 servable_config.py\n\u2502\u2500\u2500 serving_server.py\n\u251c\u2500\u2500 serving_client.py\n\u251c\u2500\u2500 imagenet1000_clsidx_to_labels.txt\n\u2514\u2500\u2500 test_image\n    \u251c\u2500 dog\n    \u2502   \u251c\u2500 dog.jpg\n    \u2502   \u2514\u2500 \u2026\u2026\n    \u2514\u2500 \u2026\u2026\n
  • mobilenet_v2_100: Model folder. The folder name is the model name.
  • mobilenet_v2_100.mindir: Model file generated by the network in the previous step, which is stored in folder 1 (the number indicates the version number). Different versions are stored in different folders. The version number must be a string of digits. By default, the latest model file is started.
  • servable_config.py: Model configuration script. Declare the model and specify the input and output parameters of the model.
  • serving_server.py: Script to start the Serving server.
  • serving_client.py: Script to start the Python client.
  • imagenet1000_clsidx_to_labels.txt: Index of 1000 labels for the ImageNet dataset, available at examples/data/.
  • test_image: Test images, available at README.

Content of the configuration file servable_config.py:

from mindspore_serving.server import register\n\n# Declare the model. The parameter model_file indicates the name of the model file and model_format indicates the model type.\nmodel = register.declare_model(model_file=\"mobilenet_v2_100.mindir\", model_format=\"MindIR\")\n\n# The input parameters of the Servable method are specified by the input parameters of the Python method. The output parameters of the Servable method are specified by the output_names of register_method.\n@register.register_method(output_names=[\"score\"])\ndef predict(image):\n    x = register.add_stage(model, image, outputs_count=1)\n    return x\n
"},{"location":"tutorials/deployment/#starting-the-service","title":"Starting the Service","text":"

The server function of MindSpore can provide deployment service through either gRPC or RESTful. The following uses gRPC as an example. The service startup script serving_server.py deploys the mobilenet_v2_100 in the local directory to device 0 and starts the gRPC server at 127.0.0.1:5500. Content of the script:

import os\nimport sys\nfrom mindspore_serving import server\n\ndef start():\n    servable_dir = os.path.dirname(os.path.realpath(sys.argv[0]))\n\n    servable_config = server.ServableStartConfig(servable_directory=servable_dir, servable_name=\"mobilenet_v2_100\",\n                                                 device_ids=0)\n    server.start_servables(servable_configs=servable_config)\n    server.start_grpc_server(address=\"127.0.0.1:5500\")\n\nif __name__ == \"__main__\":\n    start()\n

If the following log information is displayed on the server, the gRPC service is started successfully.

Serving gRPC server start success, listening on 127.0.0.1:5500\n
"},{"location":"tutorials/deployment/#inference-execution","title":"Inference Execution","text":"

Start the Python client by using serving_client.py. The client script uses the create_transforms, create_dataset and create_loader functions of mindcv.data to preprocess the image and send the image to the serving server, then postprocesse the result returned by the server and prints the prediction label of the image.

import os\nfrom mindspore_serving.client import Client\nimport numpy as np\nfrom mindcv.data import create_transforms, create_dataset, create_loader\n\nnum_workers = 1\n\n# Dataset directory path\ndata_dir = \"./test_image/\"\n\ndataset = create_dataset(root=data_dir, split='', num_parallel_workers=num_workers)\ntransforms_list = create_transforms(dataset_name='ImageNet', is_training=False)\ndata_loader = create_loader(\n    dataset=dataset,\n    batch_size=1,\n    is_training=False,\n    num_classes=1000,\n    transform=transforms_list,\n    num_parallel_workers=num_workers\n)\nwith open(\"imagenet1000_clsidx_to_labels.txt\") as f:\n    idx2label = eval(f.read())\n\ndef postprocess(score):\n    max_idx = np.argmax(score)\n    return idx2label[max_idx]\n\ndef predict():\n    client = Client(\"127.0.0.1:5500\", \"mobilenet_v2_100\", \"predict\")\n    instances = []\n    images, _ = next(data_loader.create_tuple_iterator())\n    image_np = images.asnumpy().squeeze()\n    instances.append({\"image\": image_np})\n    result = client.infer(instances)\n\n    for instance in result:\n        label = postprocess(instance[\"score\"])\n        print(label)\n\nif __name__ == '__main__':\n    predict()\n

If the following information is displayed, serving service has correctly executed the inference of the mobilenet_v2_100 model:

Labrador retriever\n

"},{"location":"tutorials/finetune/","title":"Model Fine-Tuning Training","text":"

In this tutorial, you will learn how to use MindCV for transfer Learning to solve the problem of image classification on custom datasets. In the deep learning task, we often encounter the problem of insufficient training data. At this time, it is difficult to train the entire network directly to achieve the desired accuracy. A better approach is to use a pretrained model on a large dataset (close to the task data), and then use the model to initialize the network's weight parameters or apply it to specific tasks as a fixed feature extractor.

This tutorial will use the DenseNet model pretrained on ImageNet as an example to introduce two different fine-tuning strategies to solve the image classification problem of wolves and dogs in the case of small samples:

  1. Overall model fine-tuning.
  2. Freeze backbone and only fine-tune the classifier.

For details of transfer learning, see Stanford University CS231n

"},{"location":"tutorials/finetune/#data-preparation","title":"Data Preparation","text":""},{"location":"tutorials/finetune/#download-dataset","title":"Download Dataset","text":"

Download the dog and wolf classification dataset used in the case. Each category has 120 training images and 30 verification images. Use the mindcv.utils.download interface to download the dataset, and automatically unzip the downloaded dataset to the current directory.

import os\nfrom mindcv.utils.download import DownLoad\n\ndataset_url = \"https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/notebook/datasets/intermediate/Canidae_data.zip\"\nroot_dir = \"./\"\n\nif not os.path.exists(os.path.join(root_dir, 'data/Canidae')):\n    DownLoad().download_and_extract_archive(dataset_url, root_dir)\n

The directory structure of the dataset is as follows:

data/\n\u2514\u2500\u2500 Canidae\n    \u251c\u2500\u2500 train\n    \u2502   \u251c\u2500\u2500 dogs\n    \u2502   \u2514\u2500\u2500 wolves\n    \u2514\u2500\u2500 val\n        \u251c\u2500\u2500 dogs\n        \u2514\u2500\u2500 wolves\n
"},{"location":"tutorials/finetune/#dataset-loading-and-processing","title":"Dataset Loading and Processing","text":""},{"location":"tutorials/finetune/#loading-custom-datasets","title":"Loading Custom Datasets","text":"

By calling the create_dataset function in mindcv.data, we can easily load preset and customized datasets.

  • When the parameter name is set to null, it is specified as a user-defined dataset. (Default)
  • When the parameter name is set to be MNIST, CIFAR10 or other standard dataset names, it is specified as the preset dataset.

At the same time, we need to set the path data_dir of the dataset and the name split of the data segmentation (such as train, val) to load the corresponding training set or validation set.

from mindcv.data import create_dataset, create_transforms, create_loader\n\nnum_workers = 8\n\n# path of dataset\ndata_dir = \"./data/Canidae/\"\n\n# load dataset\ndataset_train = create_dataset(root=data_dir, split='train', num_parallel_workers=num_workers)\ndataset_val = create_dataset(root=data_dir, split='val', num_parallel_workers=num_workers)\n

Note: The directory structure of the custom dataset should be the same as ImageNet, that is, the hierarchy of root ->split ->class ->image

DATASET_NAME\n    \u251c\u2500\u2500 split1(e.g. train)/\n    \u2502  \u251c\u2500\u2500 class1/\n    \u2502  \u2502   \u251c\u2500\u2500 000001.jpg\n    \u2502  \u2502   \u251c\u2500\u2500 000002.jpg\n    \u2502  \u2502   \u2514\u2500\u2500 ....\n    \u2502  \u2514\u2500\u2500 class2/\n    \u2502      \u251c\u2500\u2500 000001.jpg\n    \u2502      \u251c\u2500\u2500 000002.jpg\n    \u2502      \u2514\u2500\u2500 ....\n    \u2514\u2500\u2500 split2/\n       \u251c\u2500\u2500 class1/\n       \u2502   \u251c\u2500\u2500 000001.jpg\n       \u2502   \u251c\u2500\u2500 000002.jpg\n       \u2502   \u2514\u2500\u2500 ....\n       \u2514\u2500\u2500 class2/\n           \u251c\u2500\u2500 000001.jpg\n           \u251c\u2500\u2500 000002.jpg\n           \u2514\u2500\u2500 ....\n
"},{"location":"tutorials/finetune/#data-processing-and-augmentation","title":"Data Processing and Augmentation","text":"

First, we call the create_transforms function to obtain the preset data processing and augmentation strategy (transform list). In this task, because the file structure of the wolf-dog dataset is consistent with that of the ImageNet dataset, we specify the parameter dataset_name as ImageNet, and directly use the preset ImageNet data processing and image augmentation strategy. create_transforms also supports a variety of customized processing and enhancement operations, as well as automatic enhancement policies (AutoAug). See API description for details.

We will transfer the obtained transform list to the create_loader(), specify batch_size and other parameters to complete the preparation of training and validation data, and return the Dataset Object as the input of the model.

# Define and acquire data processing and augment operations\ntrans_train = create_transforms(dataset_name='ImageNet', is_training=True)\ntrans_val = create_transforms(dataset_name='ImageNet',is_training=False)\n\nloader_train = create_loader(\n    dataset=dataset_train,\n    batch_size=16,\n    is_training=True,\n    num_classes=2,\n    transform=trans_train,\n    num_parallel_workers=num_workers,\n)\nloader_val = create_loader(\n    dataset=dataset_val,\n    batch_size=5,\n    is_training=True,\n    num_classes=2,\n    transform=trans_val,\n    num_parallel_workers=num_workers,\n)\n
"},{"location":"tutorials/finetune/#dataset-visualization","title":"Dataset Visualization","text":"

For the Dataset object returned by the create_loader interface to complete data loading, we can create a data iterator through the create_tuple_iterator interface, access the dataset using the next iteration, and read a batch of data.

images, labels = next(loader_train.create_tuple_iterator())\nprint(\"Tensor of image\", images.shape)\nprint(\"Labels:\", labels)\n
Tensor of image (16, 3, 224, 224)\nLabels: [0 1 1 1 1 0 0 0 0 0 1 0 1 0 1 1]\n

Visualize the acquired image and label data, and the title is the label name corresponding to the image.

import matplotlib.pyplot as plt\nimport numpy as np\n\n# class_name corresponds to label, and labels are marked in the order of folder string from small to large\nclass_name = {0: \"dogs\", 1: \"wolves\"}\n\nplt.figure(figsize=(15, 7))\nfor i in range(len(labels)):\n    # Get the image and its corresponding label\n    data_image = images[i].asnumpy()\n    data_label = labels[i]\n    # Process images for display\n    data_image = np.transpose(data_image, (1, 2, 0))\n    mean = np.array([0.485, 0.456, 0.406])\n    std = np.array([0.229, 0.224, 0.225])\n    data_image = std * data_image + mean\n    data_image = np.clip(data_image, 0, 1)\n    # Show Image\n    plt.subplot(3, 6, i + 1)\n    plt.imshow(data_image)\n    plt.title(class_name[int(labels[i].asnumpy())])\n    plt.axis(\"off\")\n\nplt.show()\n

"},{"location":"tutorials/finetune/#model-fine-tuning","title":"Model Fine-Tuning","text":""},{"location":"tutorials/finetune/#1-overall-model-fine-tuning","title":"1. Overall Model Fine-Tuning","text":""},{"location":"tutorials/finetune/#pretraining-model-loading","title":"Pretraining Model Loading","text":"

We use mindcv.models.densenet to define the DenseNet121 network. When the pretrained parameter in the interface is set to True, the network weight can be automatically downloaded. Since the pretraining model is used to classify 1000 categories in the ImageNet dataset, we set num_classes=2, and the output of DenseNet's classifier (the last FC layer) is adjusted to two dimensions. At this time, only the pre-trained weights of the backbone are loaded, while the classifier uses the initial value.

from mindcv.models import create_model\n\nnetwork = create_model(model_name='densenet121', num_classes=2, pretrained=True)\n

For the specific structure of DenseNet, see the DenseNet paper.

"},{"location":"tutorials/finetune/#model-training","title":"Model Training","text":"

Use the loaded and processed wolf and dog images with tags to fine-tune the DenseNet network. Note that smaller learning rates should be used when fine-tuning the overall model.

from mindcv.loss import create_loss\nfrom mindcv.optim import create_optimizer\nfrom mindcv.scheduler import create_scheduler\nfrom mindspore import Model, LossMonitor, TimeMonitor\n\n# Define optimizer and loss function\nopt = create_optimizer(network.trainable_params(), opt='adam', lr=1e-4)\nloss = create_loss(name='CE')\n\n# Instantiated model\nmodel = Model(network, loss_fn=loss, optimizer=opt, metrics={'accuracy'})\nmodel.train(10, loader_train, callbacks=[LossMonitor(5), TimeMonitor(5)], dataset_sink_mode=False)\n
epoch: 1 step: 5, loss is 0.5195528864860535\nepoch: 1 step: 10, loss is 0.2654373049736023\nepoch: 1 step: 15, loss is 0.28758567571640015\nTrain epoch time: 17270.144 ms, per step time: 1151.343 ms\nepoch: 2 step: 5, loss is 0.1807008981704712\nepoch: 2 step: 10, loss is 0.1700802594423294\nepoch: 2 step: 15, loss is 0.09752683341503143\nTrain epoch time: 1372.549 ms, per step time: 91.503 ms\nepoch: 3 step: 5, loss is 0.13594701886177063\nepoch: 3 step: 10, loss is 0.03628234937787056\nepoch: 3 step: 15, loss is 0.039737217128276825\nTrain epoch time: 1453.237 ms, per step time: 96.882 ms\nepoch: 4 step: 5, loss is 0.014213413000106812\nepoch: 4 step: 10, loss is 0.030747078359127045\nepoch: 4 step: 15, loss is 0.0798817127943039\nTrain epoch time: 1331.237 ms, per step time: 88.749 ms\nepoch: 5 step: 5, loss is 0.009510636329650879\nepoch: 5 step: 10, loss is 0.02603740245103836\nepoch: 5 step: 15, loss is 0.051846928894519806\nTrain epoch time: 1312.737 ms, per step time: 87.516 ms\nepoch: 6 step: 5, loss is 0.1163717582821846\nepoch: 6 step: 10, loss is 0.02439398318529129\nepoch: 6 step: 15, loss is 0.02564268559217453\nTrain epoch time: 1434.704 ms, per step time: 95.647 ms\nepoch: 7 step: 5, loss is 0.013310655951499939\nepoch: 7 step: 10, loss is 0.02289542555809021\nepoch: 7 step: 15, loss is 0.1992517113685608\nTrain epoch time: 1275.935 ms, per step time: 85.062 ms\nepoch: 8 step: 5, loss is 0.015928998589515686\nepoch: 8 step: 10, loss is 0.011409260332584381\nepoch: 8 step: 15, loss is 0.008141174912452698\nTrain epoch time: 1323.102 ms, per step time: 88.207 ms\nepoch: 9 step: 5, loss is 0.10395607352256775\nepoch: 9 step: 10, loss is 0.23055407404899597\nepoch: 9 step: 15, loss is 0.04896317049860954\nTrain epoch time: 1261.067 ms, per step time: 84.071 ms\nepoch: 10 step: 5, loss is 0.03162381425499916\nepoch: 10 step: 10, loss is 0.13094250857830048\nepoch: 10 step: 15, loss is 0.020028553903102875\nTrain epoch time: 1217.958 ms, per step time: 81.197 ms\n
"},{"location":"tutorials/finetune/#model-evaluation","title":"Model Evaluation","text":"

After the training, we evaluate the accuracy of the model on the validation set.

res = model.eval(loader_val)\nprint(res)\n
{'accuracy': 1.0}\n
"},{"location":"tutorials/finetune/#visual-model-inference-results","title":"Visual Model Inference Results","text":"

Define visualize_mode function and visualize model prediction.

import matplotlib.pyplot as plt\nimport mindspore as ms\n\ndef visualize_model(model, val_dl, num_classes=2):\n    # Load the data of the validation set for validation\n    images, labels= next(val_dl.create_tuple_iterator())\n    # Predict image class\n    output = model.predict(images)\n    pred = np.argmax(output.asnumpy(), axis=1)\n    # Display images and their predicted values\n    images = images.asnumpy()\n    labels = labels.asnumpy()\n    class_name = {0: \"dogs\", 1: \"wolves\"}\n    plt.figure(figsize=(15, 7))\n    for i in range(len(labels)):\n        plt.subplot(3, 6, i + 1)\n        # If the prediction is correct, it is displayed in blue; If the prediction is wrong, it is displayed in red\n        color = 'blue' if pred[i] == labels[i] else 'red'\n        plt.title('predict:{}'.format(class_name[pred[i]]), color=color)\n        picture_show = np.transpose(images[i], (1, 2, 0))\n        mean = np.array([0.485, 0.456, 0.406])\n        std = np.array([0.229, 0.224, 0.225])\n        picture_show = std * picture_show + mean\n        picture_show = np.clip(picture_show, 0, 1)\n        plt.imshow(picture_show)\n        plt.axis('off')\n\n    plt.show()\n

Use the finely tuned model piece to predict the wolf and dog image data of the verification set. If the prediction font is blue, the prediction is correct; if the prediction font is red, the prediction is wrong.

visualize_model(model, loader_val)\n

"},{"location":"tutorials/finetune/#2-freeze-backbone-and-fine-tune-the-classifier","title":"2. Freeze Backbone and Fine-Tune the Classifier","text":""},{"location":"tutorials/finetune/#freezing-backbone-parameters","title":"Freezing Backbone Parameters","text":"

First, we need to freeze all network layers except the last layer classifier, that is, set the requires_grad attribute of the corresponding layer parameter to False, so that it does not calculate the gradient and update the parameters in the backpropagation.

Because all models in mindcv.models use a classifier to identify and name the classifier of the model (i.e., the Dense layer), the parameters of each layer outside the classifier can be filtered through classifier.weight and classifier.bias, and its requires_grad attribute is set to False.

# freeze backbone\nfor param in network.get_parameters():\n    if param.name not in [\"classifier.weight\", \"classifier.bias\"]:\n        param.requires_grad = False\n
"},{"location":"tutorials/finetune/#fine-tune-classifier","title":"Fine-Tune Classifier","text":"

Because the feature network has been fixed, we don't have to worry about distortpratised features in the training process. Therefore, compared with the first method, we can increase the learning rate.

Compared with no pretraining model, it will save more than half of the time, because partial gradient can not be calculated at this time.

# dataset load\ndataset_train = create_dataset(root=data_dir, split='train', num_parallel_workers=num_workers)\nloader_train = create_loader(\n    dataset=dataset_train,\n    batch_size=16,\n    is_training=True,\n    num_classes=2,\n    transform=trans_train,\n    num_parallel_workers=num_workers,\n)\n\n# Define optimizer and loss function\nopt = create_optimizer(network.trainable_params(), opt='adam', lr=1e-3)\nloss = create_loss(name='CE')\n\n# Instantiated model\nmodel = Model(network, loss_fn=loss, optimizer=opt, metrics={'accuracy'})\n\nmodel.train(10, loader_train, callbacks=[LossMonitor(5), TimeMonitor(5)], dataset_sink_mode=False)\n
epoch: 1 step: 5, loss is 0.051333948969841\nepoch: 1 step: 10, loss is 0.02043312042951584\nepoch: 1 step: 15, loss is 0.16161368787288666\nTrain epoch time: 10228.601 ms, per step time: 681.907 ms\nepoch: 2 step: 5, loss is 0.002121545374393463\nepoch: 2 step: 10, loss is 0.0009798109531402588\nepoch: 2 step: 15, loss is 0.015776708722114563\nTrain epoch time: 562.543 ms, per step time: 37.503 ms\nepoch: 3 step: 5, loss is 0.008056879043579102\nepoch: 3 step: 10, loss is 0.0009347647428512573\nepoch: 3 step: 15, loss is 0.028648357838392258\nTrain epoch time: 523.249 ms, per step time: 34.883 ms\nepoch: 4 step: 5, loss is 0.001014217734336853\nepoch: 4 step: 10, loss is 0.0003159046173095703\nepoch: 4 step: 15, loss is 0.0007699579000473022\nTrain epoch time: 508.886 ms, per step time: 33.926 ms\nepoch: 5 step: 5, loss is 0.0015687644481658936\nepoch: 5 step: 10, loss is 0.012090332806110382\nepoch: 5 step: 15, loss is 0.004598274827003479\nTrain epoch time: 507.243 ms, per step time: 33.816 ms\nepoch: 6 step: 5, loss is 0.010022152215242386\nepoch: 6 step: 10, loss is 0.0066385045647621155\nepoch: 6 step: 15, loss is 0.0036080628633499146\nTrain epoch time: 517.646 ms, per step time: 34.510 ms\nepoch: 7 step: 5, loss is 0.01344013586640358\nepoch: 7 step: 10, loss is 0.0008538365364074707\nepoch: 7 step: 15, loss is 0.14135593175888062\nTrain epoch time: 511.513 ms, per step time: 34.101 ms\nepoch: 8 step: 5, loss is 0.01626245677471161\nepoch: 8 step: 10, loss is 0.02871556021273136\nepoch: 8 step: 15, loss is 0.010110966861248016\nTrain epoch time: 545.678 ms, per step time: 36.379 ms\nepoch: 9 step: 5, loss is 0.008498094975948334\nepoch: 9 step: 10, loss is 0.2588501274585724\nepoch: 9 step: 15, loss is 0.0014278888702392578\nTrain epoch time: 499.243 ms, per step time: 33.283 ms\nepoch: 10 step: 5, loss is 0.021337147802114487\nepoch: 10 step: 10, loss is 0.00829876959323883\nepoch: 10 step: 15, loss is 0.008352771401405334\nTrain epoch time: 465.600 ms, per step time: 31.040 ms\n
"},{"location":"tutorials/finetune/#model-evaluation_1","title":"Model Evaluation","text":"

After the training, we evaluate the accuracy of the model on the validation set.

dataset_val = create_dataset(root=data_dir, split='val', num_parallel_workers=num_workers)\nloader_val = create_loader(\n    dataset=dataset_val,\n    batch_size=5,\n    is_training=True,\n    num_classes=2,\n    transform=trans_val,\n    num_parallel_workers=num_workers,\n)\n\nres = model.eval(loader_val)\nprint(res)\n
{'accuracy': 1.0}\n
"},{"location":"tutorials/finetune/#visual-model-prediction","title":"Visual Model Prediction","text":"

Use the finely tuned model piece to predict the wolf and dog image data of the verification set. If the prediction font is blue, the prediction is correct; if the prediction font is red, the prediction is wrong.

visualize_model(model, loader_val)\n

The prediction results of wolf/dog after fine-tuning are correct.

"},{"location":"tutorials/inference/","title":"Image Classification Prediction","text":"

This tutorial introduces how to call the pretraining model in MindCV to make classification prediction on the test image.

"},{"location":"tutorials/inference/#model-loading","title":"Model Loading","text":""},{"location":"tutorials/inference/#view-all-available-models","title":"View All Available Models","text":"

By calling the registry.list_models function in mindcv.models, the names of all network models can be printed. The models of a network in different parameter configurations will also be printed, such as resnet18 / resnet34 / resnet50 / resnet101 / resnet152.

import sys\nsys.path.append(\"..\")\nfrom mindcv.models import registry\nregistry.list_models()\n
['BiT_resnet50',\n 'repmlp_b224',\n 'repmlp_b256',\n 'repmlp_d256',\n 'repmlp_l256',\n 'repmlp_t224',\n 'repmlp_t256',\n 'convit_base',\n 'convit_base_plus',\n 'convit_small',\n ...\n 'visformer_small',\n 'visformer_small_v2',\n 'visformer_tiny',\n 'visformer_tiny_v2',\n 'vit_b_16_224',\n 'vit_b_16_384',\n 'vit_b_32_224',\n 'vit_b_32_384',\n 'vit_l_16_224',\n 'vit_l_16_384',\n 'vit_l_32_224',\n 'xception']\n
"},{"location":"tutorials/inference/#load-pretraining-model","title":"Load Pretraining Model","text":"

Taking the resnet50 model as an example, we introduce two methods to load the model checkpoint using the create_model function in mindcv.models.

1). When the pretrained parameter in the interface is set to True, network weights can be automatically downloaded.

from mindcv.models import create_model\nmodel = create_model(model_name='resnet50', num_classes=1000, pretrained=True)\n# Switch the execution logic of the network to the inference scenario\nmodel.set_train(False)\n
102453248B [00:16, 6092186.31B/s]\n\nResNet<\n  (conv1): Conv2d<input_channels=3, output_channels=64, kernel_size=(7, 7), stride=(2, 2), pad_mode=pad, padding=3, dilation=(1, 1), group=1, has_bias=False, weight_init=normal, bias_init=zeros, format=NCHW>\n  (bn1): BatchNorm2d<num_features=64, eps=1e-05, momentum=0.9, gamma=Parameter (name=bn1.gamma, shape=(64,), dtype=Float32, requires_grad=True), beta=Parameter (name=bn1.beta, shape=(64,), dtype=Float32, requires_grad=True), moving_mean=Parameter (name=bn1.moving_mean, shape=(64,), dtype=Float32, requires_grad=False), moving_variance=Parameter (name=bn1.moving_variance, shape=(64,), dtype=Float32, requires_grad=False)>\n  (relu): ReLU<>\n  (max_pool): MaxPool2d<kernel_size=3, stride=2, pad_mode=SAME>\n  ...\n  (pool): GlobalAvgPooling<>\n  (classifier): Dense<input_channels=2048, output_channels=1000, has_bias=True>\n  >\n

2). When the checkpoint_path parameter in the interface is set to the file path, the model parameter file with the .ckpt can be loaded.

from mindcv.models import create_model\nmodel = create_model(model_name='resnet50', num_classes=1000, checkpoint_path='./resnet50_224.ckpt')\n# Switch the execution logic of the network to the inference scenario\nmodel.set_train(False)\n
"},{"location":"tutorials/inference/#data-preparation","title":"Data Preparation","text":""},{"location":"tutorials/inference/#create-dataset","title":"Create Dataset","text":"

Here, we download a Wikipedia image as a test image, and use the create_dataset function in mindcv.data to construct a custom dataset for a single image.

from mindcv.data import create_dataset\nnum_workers = 1\n# path of dataset\ndata_dir = \"./data/\"\ndataset = create_dataset(root=data_dir, split='test', num_parallel_workers=num_workers)\n# Image visualization\nfrom PIL import Image\nImage.open(\"./data/test/dog/dog.jpg\")\n

"},{"location":"tutorials/inference/#data-preprocessing","title":"Data Preprocessing","text":"

Call the create_transforms function to obtain the data processing strategy (transform list) of the ImageNet dataset used by the pre-trained model.

We pass the obtained transform list into the create_loader function, specify batch_size=1 and other parameters, and then complete the preparation of test data. The Dataset object is returned as the input of the model.

from mindcv.data import create_transforms, create_loader\ntransforms_list = create_transforms(dataset_name='imagenet', is_training=False)\ndata_loader = create_loader(\n    dataset=dataset,\n    batch_size=1,\n    is_training=False,\n    num_classes=1000,\n    transform=transforms_list,\n    num_parallel_workers=num_workers\n)\n
"},{"location":"tutorials/inference/#model-inference","title":"Model Inference","text":"

The picture of the user-defined dataset is transferred to the model to obtain the inference result. Here, use the Squeeze function of mindspore.ops to remove the batch dimension.

import mindspore.ops as P\nimport numpy as np\nimages, _ = next(data_loader.create_tuple_iterator())\noutput = P.Squeeze()(model(images))\npred = np.argmax(output.asnumpy())\n
with open(\"imagenet1000_clsidx_to_labels.txt\") as f:\n    idx2label = eval(f.read())\nprint('predict: {}'.format(idx2label[pred]))\n
predict: Labrador retriever\n
"},{"location":"tutorials/quick_start/","title":"Quick Start","text":"

MindCV is an open-source toolbox for computer vision research and development based on MindSpore. It collects a series of classic and SoTA vision models, such as ResNet and SwinTransformer, along with their pretrained weights. SoTA methods such as AutoAugment are also provided for performance improvement. With the decoupled module design, it is easy to apply or adapt MindCV to your own CV tasks. In this tutorial, we will provide a quick start guideline for MindCV.

This tutorial will take DenseNet classification model as an example to implement transfer training on CIFAR-10 dataset and explain the usage of MindCV modules in this process.

"},{"location":"tutorials/quick_start/#environment-setting","title":"Environment Setting","text":"

See Installation for details.

"},{"location":"tutorials/quick_start/#data","title":"Data","text":""},{"location":"tutorials/quick_start/#dataset","title":"Dataset","text":"

Through the create_dataset module in mindcv.data, we can quickly load standard datasets or customized datasets.

import os\nfrom mindcv.data import create_dataset, create_transforms, create_loader\n\ncifar10_dir = './datasets/cifar/cifar-10-batches-bin'  # your dataset path\nnum_classes = 10  # num of classes\nnum_workers = 8  # num of parallel workers\n\n# create dataset\ndataset_train = create_dataset(\n    name='cifar10', root=cifar10_dir, split='train', shuffle=True, num_parallel_workers=num_workers\n)\n
"},{"location":"tutorials/quick_start/#transform","title":"Transform","text":"

Through the create_transforms function, you can directly obtain the appropriate data processing augmentation strategies (transform list) for standard datasets, including common data processing strategies on Cifar10 and Imagenet.

# create transforms\ntrans = create_transforms(dataset_name='cifar10', image_resize=224)\n
"},{"location":"tutorials/quick_start/#loader","title":"Loader","text":"

The mindcv.data.create_loader function is used for data conversion and batch split loading. We need to pass in the transform_list returned by create_transforms.

# Perform data augmentation operations to generate the required dataset.\nloader_train = create_loader(dataset=dataset_train,\n                             batch_size=64,\n                             is_training=True,\n                             num_classes=num_classes,\n                             transform=trans,\n                             num_parallel_workers=num_workers)\n\nnum_batches = loader_train.get_dataset_size()\n

Avoid repeatedly executing a single cell of create_loader in notebook, or execute again after executing create_dataset.

"},{"location":"tutorials/quick_start/#model","title":"Model","text":"

Use the create_model interface to obtain the instantiated DenseNet and load the pretraining weight(obtained from ImageNet dataset training).

from mindcv.models import create_model\n\n# instantiate the DenseNet121 model and load the pretraining weights.\nnetwork = create_model(model_name='densenet121', num_classes=num_classes, pretrained=True)\n

Because the number of classes required by CIFAR-10 and ImageNet datasets is different, the classifier parameters cannot be shared, and the warning that the classifier parameters cannot be loaded does not affect the fine-tuning.

"},{"location":"tutorials/quick_start/#loss","title":"Loss","text":"

By create_loss interface obtains loss function.

from mindcv.loss import create_loss\n\nloss = create_loss(name='CE')\n
"},{"location":"tutorials/quick_start/#learning-rate-scheduler","title":"Learning Rate Scheduler","text":"

Use create_scheduler interface to set the learning rate scheduler.

from mindcv.scheduler import create_scheduler\n\n# learning rate scheduler\nlr_scheduler = create_scheduler(steps_per_epoch=num_batches,\n                                scheduler='constant',\n                                lr=0.0001)\n
"},{"location":"tutorials/quick_start/#optimizer","title":"Optimizer","text":"

Use create_optimizer interface creates an optimizer.

from mindcv.optim import create_optimizer\n\n# create optimizer\nopt = create_optimizer(network.trainable_params(), opt='adam', lr=lr_scheduler)\n
"},{"location":"tutorials/quick_start/#training","title":"Training","text":"

Use the mindspore.Model interface to encapsulate trainable instances according to the parameters passed in by the user.

from mindspore import Model\n\n# Encapsulates examples that can be trained or inferred\nmodel = Model(network, loss_fn=loss, optimizer=opt, metrics={'accuracy'})\n

Use the mindspore.Model.train interface for model training.

from mindspore import LossMonitor, TimeMonitor, CheckpointConfig, ModelCheckpoint\n\n# Set the callback function for saving network parameters during training.\nckpt_save_dir = './ckpt'\nckpt_config = CheckpointConfig(save_checkpoint_steps=num_batches)\nckpt_cb = ModelCheckpoint(prefix='densenet121-cifar10',\n                          directory=ckpt_save_dir,\n                          config=ckpt_config)\n\nmodel.train(5, loader_train, callbacks=[LossMonitor(num_batches//5), TimeMonitor(num_batches//5), ckpt_cb], dataset_sink_mode=False)\n
[WARNING] DEVICE(1769,ffff87c70ac0,python):2022-12-21-16:04:30.001.890 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:330] FilterRaisedOrReducePrecisionMatchedKernelInfo] Operator:[Default/network-WithLossCell/_loss_fn-CrossEntropySmooth/GatherD-op5273] don't support int64, reduce precision from int64 to int32.\n\n\nepoch: 1 step: 156, loss is 2.0816354751586914\nepoch: 1 step: 312, loss is 1.4474115371704102\nepoch: 1 step: 468, loss is 0.8935483694076538\nepoch: 1 step: 624, loss is 0.5588696002960205\nepoch: 1 step: 780, loss is 0.3161369860172272\n\n\n[WARNING] DEVICE(1769,ffff87c70ac0,python):2022-12-21-16:09:20.261.851 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:330] FilterRaisedOrReducePrecisionMatchedKernelInfo] Operator:[Default/network-WithLossCell/_loss_fn-CrossEntropySmooth/GatherD-op16720] don't support int64, reduce precision from int64 to int32.\n\n\nTrain epoch time: 416429.509 ms, per step time: 532.519 ms\nepoch: 2 step: 154, loss is 0.19752007722854614\nepoch: 2 step: 310, loss is 0.14635677635669708\nepoch: 2 step: 466, loss is 0.3511860966682434\nepoch: 2 step: 622, loss is 0.12542471289634705\nepoch: 2 step: 778, loss is 0.22351759672164917\nTrain epoch time: 156746.872 ms, per step time: 200.444 ms\nepoch: 3 step: 152, loss is 0.08965137600898743\nepoch: 3 step: 308, loss is 0.22765043377876282\nepoch: 3 step: 464, loss is 0.19035443663597107\nepoch: 3 step: 620, loss is 0.06591956317424774\nepoch: 3 step: 776, loss is 0.0934530645608902\nTrain epoch time: 156574.210 ms, per step time: 200.223 ms\nepoch: 4 step: 150, loss is 0.03782692924141884\nepoch: 4 step: 306, loss is 0.023876197636127472\nepoch: 4 step: 462, loss is 0.038690414279699326\nepoch: 4 step: 618, loss is 0.15388774871826172\nepoch: 4 step: 774, loss is 0.1581358164548874\nTrain epoch time: 158398.108 ms, per step time: 202.555 ms\nepoch: 5 step: 148, loss is 0.06556802988052368\nepoch: 5 step: 304, loss is 0.006707251071929932\nepoch: 5 step: 460, loss is 0.02353120595216751\nepoch: 5 step: 616, loss is 0.014183484017848969\nepoch: 5 step: 772, loss is 0.09367241710424423\nTrain epoch time: 154978.618 ms, per step time: 198.182 ms\n
"},{"location":"tutorials/quick_start/#evaluation","title":"Evaluation","text":"

Now, let's evaluate the trained model on the validation set of CIFAR-10.

# Load validation dataset\ndataset_val = create_dataset(\n    name='cifar10', root=cifar10_dir, split='test', shuffle=True, num_parallel_workers=num_workers\n)\n\n# Perform data enhancement operations to generate the required dataset.\nloader_val = create_loader(dataset=dataset_val,\n                           batch_size=64,\n                           is_training=False,\n                           num_classes=num_classes,\n                           transform=trans,\n                           num_parallel_workers=num_workers)\n

Load the fine-tuning parameter file (densenet121-cifar10-5_782.ckpt) to the model.

Encapsulate inferable instances according to the parameters passed in by the user, load the validation dataset and verify the precision of the fine-tuned DenseNet121 model.

# Verify the accuracy of DenseNet121 after fine-tune\nacc = model.eval(loader_val, dataset_sink_mode=False)\nprint(acc)\n
[WARNING] DEVICE(1769,ffff87c70ac0,python):2022-12-21-16:24:11.927.472 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:330] FilterRaisedOrReducePrecisionMatchedKernelInfo] Operator:[Default/network-WithLossCell/_loss_fn-CrossEntropySmooth/GatherD-op24314] don't support int64, reduce precision from int64 to int32.\n\n\n{'accuracy': 0.951}\n\n\n[WARNING] DEVICE(1769,ffff87c70ac0,python):2022-12-21-16:25:01.871.273 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:330] FilterRaisedOrReducePrecisionMatchedKernelInfo] Operator:[Default/network-WithLossCell/_loss_fn-CrossEntropySmooth/GatherD-op27139] don't support int64, reduce precision from int64 to int32.\n
"},{"location":"tutorials/quick_start/#use-yaml-files-for-model-training-and-validation","title":"Use YAML files for model training and validation","text":"

We can also use the yaml file with the model parameters set directly to quickly train and verify the model through train.py and validate.py scripts. The following is an example of training SqueezenetV1 on ImageNet (you need to download ImageNet to the directory in advance).

For detailed tutorials, please refer to the tutorial.

# standalone training on a CPU/GPU/Ascend device\npython train.py -c configs/squeezenet/squeezenet_1.0_gpu.yaml --data_dir /path/to/dataset --distribute False\n
python validate.py -c configs/squeezenet/squeezenet_1.0_gpu.yaml --data_dir /path/to/dataset --ckpt_path /path/to/ckpt\n
"},{"location":"zh/","title":"\u4e3b\u9875","text":""},{"location":"zh/#_1","title":"\u7b80\u4ecb","text":"

MindCV\u662f\u4e00\u4e2a\u57fa\u4e8e MindSpore \u5f00\u53d1\u7684\uff0c\u81f4\u529b\u4e8e\u8ba1\u7b97\u673a\u89c6\u89c9\u76f8\u5173\u6280\u672f\u7814\u53d1\u7684\u5f00\u6e90\u5de5\u5177\u7bb1\u3002\u5b83\u63d0\u4f9b\u5927\u91cf\u7684\u8ba1\u7b97\u673a\u89c6\u89c9\u9886\u57df\u7684\u7ecf\u5178\u6a21\u578b\u548cSoTA\u6a21\u578b\u4ee5\u53ca\u5b83\u4eec\u7684\u9884\u8bad\u7ec3\u6743\u91cd\u548c\u8bad\u7ec3\u7b56\u7565\u3002\u540c\u65f6\uff0c\u8fd8\u63d0\u4f9b\u4e86\u81ea\u52a8\u589e\u5f3a\u7b49SoTA\u7b97\u6cd5\u6765\u63d0\u9ad8\u6a21\u578b\u6027\u80fd\u3002\u901a\u8fc7\u89e3\u8026\u7684\u6a21\u5757\u8bbe\u8ba1\uff0c\u60a8\u53ef\u4ee5\u8f7b\u677e\u5730\u5c06MindCV\u5e94\u7528\u5230\u60a8\u81ea\u5df1\u7684CV\u4efb\u52a1\u4e2d\u3002

"},{"location":"zh/#_2","title":"\u4e3b\u8981\u7279\u6027","text":"
  • \u9ad8\u6613\u7528\u6027 MindCV\u5c06\u89c6\u89c9\u4efb\u52a1\u5206\u89e3\u4e3a\u5404\u79cd\u53ef\u914d\u7f6e\u7684\u7ec4\u4ef6\uff0c\u7528\u6237\u53ef\u4ee5\u8f7b\u677e\u5730\u6784\u5efa\u81ea\u5df1\u7684\u6570\u636e\u5904\u7406\u548c\u6a21\u578b\u8bad\u7ec3\u6d41\u7a0b\u3002

    >>> import mindcv\n# \u521b\u5efa\u6570\u636e\u96c6\n>>> dataset = mindcv.create_dataset('cifar10', download=True)\n# \u521b\u5efa\u6a21\u578b\n>>> network = mindcv.create_model('resnet50', pretrained=True)\n

    \u7528\u6237\u53ef\u901a\u8fc7\u9884\u5b9a\u4e49\u7684\u8bad\u7ec3\u548c\u5fae\u8c03\u811a\u672c\uff0c\u5feb\u901f\u914d\u7f6e\u5e76\u5b8c\u6210\u8bad\u7ec3\u6216\u8fc1\u79fb\u5b66\u4e60\u4efb\u52a1\u3002

    # \u914d\u7f6e\u548c\u542f\u52a8\u8fc1\u79fb\u5b66\u4e60\u4efb\u52a1\npython train.py --model swin_tiny --pretrained --opt=adamw --lr=0.001 --data_dir=/path/to/dataset\n
  • \u9ad8\u6027\u80fd MindCV\u96c6\u6210\u4e86\u5927\u91cf\u57fa\u4e8eCNN\u548cTransformer\u7684\u9ad8\u6027\u80fd\u6a21\u578b\uff0c\u5982SwinTransformer\uff0c\u5e76\u63d0\u4f9b\u9884\u8bad\u7ec3\u6743\u91cd\u3001\u8bad\u7ec3\u7b56\u7565\u548c\u6027\u80fd\u62a5\u544a\uff0c\u5e2e\u52a9\u7528\u6237\u5feb\u901f\u9009\u578b\u5e76\u5c06\u5176\u5e94\u7528\u4e8e\u89c6\u89c9\u6a21\u578b\u3002

  • \u7075\u6d3b\u9ad8\u6548 MindCV\u57fa\u4e8e\u9ad8\u6548\u7684\u6df1\u5ea6\u5b66\u4e60\u6846\u67b6MindSpore\u5f00\u53d1\uff0c\u5177\u6709\u81ea\u52a8\u5e76\u884c\u548c\u81ea\u52a8\u5fae\u5206\u7b49\u7279\u6027\uff0c\u652f\u6301\u4e0d\u540c\u786c\u4ef6\u5e73\u53f0\u4e0a\uff08CPU/GPU/Ascend\uff09\uff0c\u540c\u65f6\u652f\u6301\u6548\u7387\u4f18\u5316\u7684\u9759\u6001\u56fe\u6a21\u5f0f\u548c\u8c03\u8bd5\u7075\u6d3b\u7684\u52a8\u6001\u56fe\u6a21\u5f0f\u3002

"},{"location":"zh/#_3","title":"\u6a21\u578b\u652f\u6301","text":"

\u57fa\u4e8eMindCV\u8fdb\u884c\u6a21\u578b\u5b9e\u73b0\u548c\u91cd\u8bad\u7ec3\u7684\u6c47\u603b\u7ed3\u679c\u8be6\u89c1\u6a21\u578b\u4ed3\u5e93, \u6240\u7528\u5230\u7684\u8bad\u7ec3\u7b56\u7565\u548c\u8bad\u7ec3\u540e\u7684\u6a21\u578b\u6743\u91cd\u5747\u53ef\u901a\u8fc7\u8868\u4e2d\u94fe\u63a5\u83b7\u53d6\u3002

\u5404\u6a21\u578b\u8bb2\u89e3\u548c\u8bad\u7ec3\u8bf4\u660e\u8be6\u89c1configs\u76ee\u5f55\u3002

"},{"location":"zh/#_4","title":"\u5b89\u88c5","text":"

\u8be6\u60c5\u8bf7\u89c1\u5b89\u88c5\u9875\u9762\u3002

"},{"location":"zh/#_5","title":"\u5feb\u901f\u5165\u95e8","text":""},{"location":"zh/#_6","title":"\u4e0a\u624b\u6559\u7a0b","text":"

\u5728\u5f00\u59cb\u4e0a\u624bMindCV\u524d\uff0c\u53ef\u4ee5\u9605\u8bfbMindCV\u7684\u5feb\u901f\u5f00\u59cb\uff0c\u8be5\u6559\u7a0b\u53ef\u4ee5\u5e2e\u52a9\u7528\u6237\u5feb\u901f\u4e86\u89e3MindCV\u7684\u5404\u4e2a\u91cd\u8981\u7ec4\u4ef6\u4ee5\u53ca\u8bad\u7ec3\u3001\u9a8c\u8bc1\u3001\u6d4b\u8bd5\u6d41\u7a0b\u3002

\u4ee5\u4e0b\u662f\u4e00\u4e9b\u4f9b\u60a8\u5feb\u901f\u4f53\u9a8c\u7684\u4ee3\u7801\u6837\u4f8b\u3002

>>> import mindcv\n# \u5217\u51fa\u6ee1\u8db3\u6761\u4ef6\u7684\u9884\u8bad\u7ec3\u6a21\u578b\u540d\u79f0\n>>> mindcv.list_models(\"swin*\", pretrained=True)\n['swin_tiny']\n# \u521b\u5efa\u6a21\u578b\n>>> network = mindcv.create_model('swin_tiny', pretrained=True)\n# \u9a8c\u8bc1\u6a21\u578b\u7684\u51c6\u786e\u7387\n>>> !python validate.py --model=swin_tiny --pretrained --dataset=imagenet --val_split=validation\n{'Top_1_Accuracy': 0.80824, 'Top_5_Accuracy': 0.94802, 'loss': 1.7331367141008378}\n
\u56fe\u7247\u5206\u7c7b\u793a\u4f8b

\u53f3\u952e\u70b9\u51fb\u5982\u4e0b\u56fe\u7247\uff0c\u53e6\u5b58\u4e3adog.jpg\u3002

\u4f7f\u7528\u52a0\u8f7d\u4e86\u9884\u8bad\u7ec3\u53c2\u6570\u7684SoTA\u6a21\u578b\u5bf9\u56fe\u7247\u8fdb\u884c\u63a8\u7406\u3002

>>> !python infer.py --model=swin_tiny --image_path='./dog.jpg'\n{'Labrador retriever': 0.5700152, 'golden retriever': 0.034551315, 'kelpie': 0.010108651, 'Chesapeake Bay retriever': 0.008229004, 'Walker hound, Walker foxhound': 0.007791956}\n

\u9884\u6d4b\u7ed3\u679c\u6392\u540d\u524d1\u7684\u662f\u62c9\u5e03\u62c9\u591a\u72ac\uff0c\u6b63\u662f\u8fd9\u5f20\u56fe\u7247\u91cc\u7684\u72d7\u72d7\u7684\u54c1\u79cd\u3002

"},{"location":"zh/#_7","title":"\u6a21\u578b\u8bad\u7ec3","text":"

\u901a\u8fc7train.py\uff0c\u7528\u6237\u53ef\u4ee5\u5f88\u5bb9\u6613\u5730\u5728\u6807\u51c6\u6570\u636e\u96c6\u6216\u81ea\u5b9a\u4e49\u6570\u636e\u96c6\u4e0a\u8bad\u7ec3\u6a21\u578b\uff0c\u7528\u6237\u53ef\u4ee5\u901a\u8fc7\u5916\u90e8\u53d8\u91cf\u6216\u8005yaml\u914d\u7f6e\u6587\u4ef6\u6765\u8bbe\u7f6e\u8bad\u7ec3\u7b56\u7565\uff08\u5982\u6570\u636e\u589e\u5f3a\u3001\u5b66\u4e60\u7387\u7b56\u7565\uff09\u3002

  • \u5355\u5361\u8bad\u7ec3

    # \u5355\u5361\u8bad\u7ec3\npython train.py --model resnet50 --dataset cifar10 --dataset_download\n

    \u4ee5\u4e0a\u4ee3\u7801\u662f\u5728CIFAR10\u6570\u636e\u96c6\u4e0a\u5355\u5361\uff08CPU/GPU/Ascend\uff09\u8bad\u7ec3ResNet\u7684\u793a\u4f8b\uff0c\u901a\u8fc7model\u548cdataset\u53c2\u6570\u5206\u522b\u6307\u5b9a\u9700\u8981\u8bad\u7ec3\u7684\u6a21\u578b\u548c\u6570\u636e\u96c6\u3002

  • \u5206\u5e03\u5f0f\u8bad\u7ec3

    \u5bf9\u4e8e\u50cfImageNet\u8fd9\u6837\u7684\u5927\u578b\u6570\u636e\u96c6\uff0c\u6709\u5fc5\u8981\u5728\u591a\u4e2a\u8bbe\u5907\u4e0a\u4ee5\u5206\u5e03\u5f0f\u6a21\u5f0f\u8fdb\u884c\u8bad\u7ec3\u3002\u57fa\u4e8eMindSpore\u5bf9\u5206\u5e03\u5f0f\u76f8\u5173\u529f\u80fd\u7684\u826f\u597d\u652f\u6301\uff0c\u7528\u6237\u53ef\u4ee5\u4f7f\u7528mpirun\u6765\u8fdb\u884c\u6a21\u578b\u7684\u5206\u5e03\u5f0f\u8bad\u7ec3\u3002

    # \u5206\u5e03\u5f0f\u8bad\u7ec3\n# \u5047\u8bbe\u4f60\u67094\u5f20GPU\u6216\u8005NPU\u5361\nmpirun --allow-run-as-root -n 4 python train.py --distribute \\\n--model densenet121 --dataset imagenet --data_dir ./datasets/imagenet\n

    \u5b8c\u6574\u7684\u53c2\u6570\u5217\u8868\u53ca\u8bf4\u660e\u5728config.py\u4e2d\u5b9a\u4e49\uff0c\u53ef\u8fd0\u884cpython train.py --help\u5feb\u901f\u67e5\u770b\u3002

    \u5982\u9700\u6062\u590d\u8bad\u7ec3\uff0c\u8bf7\u6307\u5b9a--ckpt_path\u548c--ckpt_save_dir\u53c2\u6570\uff0c\u811a\u672c\u5c06\u52a0\u8f7d\u8def\u5f84\u4e2d\u7684\u6a21\u578b\u6743\u91cd\u548c\u4f18\u5316\u5668\u72b6\u6001\uff0c\u5e76\u6062\u590d\u4e2d\u65ad\u7684\u8bad\u7ec3\u8fdb\u7a0b\u3002

  • \u8d85\u53c2\u914d\u7f6e\u548c\u9884\u8bad\u7ec3\u7b56\u7565

    \u60a8\u53ef\u4ee5\u7f16\u5199yaml\u6587\u4ef6\u6216\u8bbe\u7f6e\u5916\u90e8\u53c2\u6570\u6765\u6307\u5b9a\u914d\u7f6e\u6570\u636e\u3001\u6a21\u578b\u3001\u4f18\u5316\u5668\u7b49\u7ec4\u4ef6\u53ca\u5176\u8d85\u53c2\u3002\u4ee5\u4e0b\u662f\u4f7f\u7528\u9884\u8bbe\u7684\u8bad\u7ec3\u7b56\u7565\uff08yaml\u6587\u4ef6\uff09\u8fdb\u884c\u6a21\u578b\u8bad\u7ec3\u7684\u793a\u4f8b\u3002

    mpirun --allow-run-as-root -n 4 python train.py -c configs/squeezenet/squeezenet_1.0_gpu.yaml\n

    \u9884\u5b9a\u4e49\u7684\u8bad\u7ec3\u7b56\u7565

    MindCV\u76ee\u524d\u63d0\u4f9b\u4e86\u8d85\u8fc720\u79cd\u6a21\u578b\u8bad\u7ec3\u7b56\u7565\uff0c\u5728ImageNet\u53d6\u5f97SoTA\u6027\u80fd\u3002 \u5177\u4f53\u7684\u53c2\u6570\u914d\u7f6e\u548c\u8be6\u7ec6\u7cbe\u5ea6\u6027\u80fd\u6c47\u603b\u8bf7\u89c1configs\u6587\u4ef6\u5939\u3002 \u60a8\u53ef\u4ee5\u4fbf\u6377\u5730\u5c06\u8fd9\u4e9b\u8bad\u7ec3\u7b56\u7565\u7528\u4e8e\u60a8\u7684\u6a21\u578b\u8bad\u7ec3\u4e2d\u4ee5\u63d0\u9ad8\u6027\u80fd\uff08\u590d\u7528\u6216\u4fee\u6539\u76f8\u5e94\u7684yaml\u6587\u4ef6\u5373\u53ef\uff09\u3002

  • \u5728ModelArts/OpenI\u5e73\u53f0\u4e0a\u8bad\u7ec3

    \u5728ModelArts\u6216OpenI\u4e91\u5e73\u53f0\u4e0a\u8fdb\u884c\u8bad\u7ec3\uff0c\u9700\u8981\u6267\u884c\u4ee5\u4e0b\u64cd\u4f5c\uff1a

    1\u3001\u5728\u4e91\u5e73\u53f0\u4e0a\u521b\u5efa\u65b0\u7684\u8bad\u7ec3\u4efb\u52a1\u3002\n2\u3001\u5728\u7f51\u7ad9UI\u754c\u9762\u6dfb\u52a0\u8fd0\u884c\u53c2\u6570`config`\uff0c\u5e76\u6307\u5b9ayaml\u914d\u7f6e\u6587\u4ef6\u7684\u8def\u5f84\u3002\n3\u3001\u5728\u7f51\u7ad9UI\u754c\u9762\u6dfb\u52a0\u8fd0\u884c\u53c2\u6570`enable_modelarts`\u5e76\u8bbe\u7f6e\u4e3aTrue\u3002\n4\u3001\u5728\u7f51\u7ad9\u4e0a\u586b\u5199\u5176\u4ed6\u8bad\u7ec3\u4fe1\u606f\u5e76\u542f\u52a8\u8bad\u7ec3\u4efb\u52a1\u3002\n

\u9759\u6001\u56fe\u548c\u52a8\u6001\u56fe\u6a21\u5f0f

\u5728\u9ed8\u8ba4\u60c5\u51b5\u4e0b\uff0c\u6a21\u578b\u8bad\u7ec3\uff08train.py\uff09\u5728MindSpore\u4e0a\u4ee5\u56fe\u6a21\u5f0f \u8fd0\u884c\uff0c\u8be5\u6a21\u5f0f\u5bf9\u4f7f\u7528\u9759\u6001\u56fe\u7f16\u8bd1\u5bf9\u6027\u80fd\u548c\u5e76\u884c\u8ba1\u7b97\u8fdb\u884c\u4e86\u4f18\u5316\u3002 \u76f8\u6bd4\u4e4b\u4e0b\uff0cpynative\u6a21\u5f0f\u7684\u4f18\u52bf\u5728\u4e8e\u7075\u6d3b\u6027\u548c\u6613\u4e8e\u8c03\u8bd5\u3002\u4e3a\u4e86\u65b9\u4fbf\u8c03\u8bd5\uff0c\u60a8\u53ef\u4ee5\u5c06\u53c2\u6570--mode\u8bbe\u4e3a1\u4ee5\u5c06\u8fd0\u884c\u6a21\u5f0f\u8bbe\u7f6e\u4e3a\u8c03\u8bd5\u6a21\u5f0f\u3002

\u6df7\u5408\u6a21\u5f0f

\u57fa\u4e8emindspore.jit\u7684\u6df7\u5408\u6a21\u5f0f \u662f\u517c\u987e\u4e86MindSpore\u7684\u6548\u7387\u548c\u7075\u6d3b\u7684\u6df7\u5408\u6a21\u5f0f\u3002\u7528\u6237\u53ef\u901a\u8fc7\u4f7f\u7528train_with_func.py\u6587\u4ef6\u6765\u4f7f\u7528\u8be5\u6df7\u5408\u6a21\u5f0f\u8fdb\u884c\u8bad\u7ec3\u3002

python train_with_func.py --model=resnet50 --dataset=cifar10 --dataset_download --epoch_size=10\n

\u6ce8\uff1a\u6b64\u4e3a\u8bd5\u9a8c\u6027\u8d28\u7684\u8bad\u7ec3\u811a\u672c\uff0c\u4ecd\u5728\u6539\u8fdb\uff0c\u5728MindSpore 1.8.1\u6216\u66f4\u65e9\u7248\u672c\u4e0a\u4f7f\u7528\u6b64\u6a21\u5f0f\u76ee\u524d\u5e76\u4e0d\u7a33\u5b9a\u3002

"},{"location":"zh/#_8","title":"\u6a21\u578b\u9a8c\u8bc1","text":"

\u4f7f\u7528validate.py\u53ef\u4ee5\u4fbf\u6377\u5730\u9a8c\u8bc1\u8bad\u7ec3\u597d\u7684\u6a21\u578b\u3002

# \u9a8c\u8bc1\u6a21\u578b\npython validate.py --model=resnet50 --dataset=imagenet --data_dir=/path/to/data --ckpt_path=/path/to/model.ckpt\n

\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u8fdb\u884c\u9a8c\u8bc1

\u5f53\u9700\u8981\u5728\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\uff0c\u8ddf\u8e2a\u6a21\u578b\u5728\u6d4b\u8bd5\u96c6\u4e0a\u7cbe\u5ea6\u7684\u53d8\u5316\u65f6\uff0c\u8bf7\u542f\u7528\u53c2\u6570--val_while_train\uff0c\u5982\u4e0b

python train.py --model=resnet50 --dataset=cifar10 \\\n--val_while_train --val_split=test --val_interval=1\n

\u5404\u8f6e\u6b21\u7684\u8bad\u7ec3\u635f\u5931\u548c\u6d4b\u8bd5\u7cbe\u5ea6\u5c06\u4fdd\u5b58\u5728{ckpt_save_dir}/results.log\u4e2d\u3002

\u66f4\u591a\u8bad\u7ec3\u548c\u9a8c\u8bc1\u7684\u793a\u4f8b\u8bf7\u89c1\u793a\u4f8b\u3002

"},{"location":"zh/#_9","title":"\u6559\u7a0b","text":"

\u6211\u4eec\u63d0\u4f9b\u4e86\u7cfb\u5217\u6559\u7a0b\uff0c\u5e2e\u52a9\u7528\u6237\u5b66\u4e60\u5982\u4f55\u4f7f\u7528MindCV.

  • \u4e86\u89e3\u6a21\u578b\u914d\u7f6e
  • \u6a21\u578b\u63a8\u7406
  • \u81ea\u5b9a\u4e49\u6570\u636e\u96c6\u4e0a\u7684\u6a21\u578b\u5fae\u8c03\u8bad\u7ec3
  • \u5982\u4f55\u81ea\u5b9a\u4e49\u6a21\u578b //coming soon
  • \u89c6\u89c9transformer\u6027\u80fd\u4f18\u5316 //coming soon
  • \u90e8\u7f72\u63a8\u7406\u670d\u52a1
"},{"location":"zh/#_10","title":"\u652f\u6301\u7b97\u6cd5","text":"\u652f\u6301\u7b97\u6cd5\u5217\u8868
  • \u6570\u636e\u589e\u5f3a
    • AutoAugment
    • RandAugment
    • Repeated Augmentation
    • RandErasing (Cutout)
    • CutMix
    • MixUp
    • RandomResizeCrop
    • Color Jitter, Flip, etc
  • \u4f18\u5316\u5668
    • Adam
    • AdamW
    • Lion
    • Adan (experimental)
    • AdaGrad
    • LAMB
    • Momentum
    • RMSProp
    • SGD
    • NAdam
  • \u5b66\u4e60\u7387\u8c03\u5ea6\u5668
    • Warmup Cosine Decay
    • Step LR
    • Polynomial Decay
    • Exponential Decay
  • \u6b63\u5219\u5316
    • Weight Decay
    • Label Smoothing
    • Stochastic Depth (depends on networks)
    • Dropout (depends on networks)
  • \u635f\u5931\u51fd\u6570
    • Cross Entropy (w/ class weight and auxiliary logit support)
    • Binary Cross Entropy (w/ class weight and auxiliary logit support)
    • Soft Cross Entropy Loss (automatically enabled if mixup or label smoothing is used)
    • Soft Binary Cross Entropy Loss (automatically enabled if mixup or label smoothing is used)
  • \u6a21\u578b\u878d\u5408
    • Warmup EMA (Exponential Moving Average)
"},{"location":"zh/#_11","title":"\u8d21\u732e\u65b9\u5f0f","text":"

\u6b22\u8fce\u5f00\u53d1\u8005\u7528\u6237\u63d0issue\u6216\u63d0\u4ea4\u4ee3\u7801PR\uff0c\u6216\u8d21\u732e\u66f4\u591a\u7684\u7b97\u6cd5\u548c\u6a21\u578b\uff0c\u4e00\u8d77\u8ba9MindCV\u53d8\u5f97\u66f4\u597d\u3002

\u6709\u5173\u8d21\u732e\u6307\u5357\uff0c\u8bf7\u53c2\u9605\u8d21\u732e\u3002 \u8bf7\u9075\u5faa\u6a21\u578b\u7f16\u5199\u6307\u5357\u6240\u89c4\u5b9a\u7684\u89c4\u5219\u6765\u8d21\u732e\u6a21\u578b\u63a5\u53e3\uff1a)

"},{"location":"zh/#_12","title":"\u8bb8\u53ef\u8bc1","text":"

\u672c\u9879\u76ee\u9075\u5faaApache License 2.0\u5f00\u6e90\u534f\u8bae\u3002

"},{"location":"zh/#_13","title":"\u81f4\u8c22","text":"

MindCV\u662f\u7531MindSpore\u56e2\u961f\u3001\u897f\u5b89\u7535\u5b50\u79d1\u6280\u5927\u5b66\u3001\u897f\u5b89\u4ea4\u901a\u5927\u5b66\u8054\u5408\u5f00\u53d1\u7684\u5f00\u6e90\u9879\u76ee\u3002 \u8877\u5fc3\u611f\u8c22\u6240\u6709\u53c2\u4e0e\u7684\u7814\u7a76\u4eba\u5458\u548c\u5f00\u53d1\u4eba\u5458\u4e3a\u8fd9\u4e2a\u9879\u76ee\u6240\u4ed8\u51fa\u7684\u52aa\u529b\u3002 \u5341\u5206\u611f\u8c22 OpenI \u5e73\u53f0\u6240\u63d0\u4f9b\u7684\u7b97\u529b\u8d44\u6e90\u3002

"},{"location":"zh/#_14","title":"\u5f15\u7528","text":"

\u5982\u679c\u4f60\u89c9\u5f97MindCV\u5bf9\u4f60\u7684\u9879\u76ee\u6709\u5e2e\u52a9\uff0c\u8bf7\u8003\u8651\u5f15\u7528\uff1a

@misc{MindSpore Computer Vision 2022,\n    title={{MindSpore Computer  Vision}:MindSpore Computer Vision Toolbox and Benchmark},\n    author={MindSpore Vision Contributors},\n    howpublished = {\\url{https://github.com/mindspore-lab/mindcv/}},\n    year={2022}\n}\n
"},{"location":"zh/installation/","title":"\u5b89\u88c5","text":""},{"location":"zh/installation/#_1","title":"\u4f9d\u8d56","text":"
  • mindspore >= 1.8.1
  • numpy >= 1.17.0
  • pyyaml >= 5.3
  • tqdm
  • openmpi 4.0.3 (\u5206\u5e03\u5f0f\u8bad\u7ec3\u6240\u9700)

\u4e3a\u4e86\u5b89\u88c5python\u76f8\u5173\u5e93\u4f9d\u8d56\uff0c\u53ea\u9700\u8fd0\u884c\uff1a

pip install -r requirements.txt\n

Tip

MindSpore\u53ef\u4ee5\u901a\u8fc7\u9075\u5faa\u5b98\u65b9\u6307\u5f15\uff0c\u5728\u4e0d\u540c\u7684\u786c\u4ef6\u5e73\u53f0\u4e0a\u83b7\u5f97\u6700\u4f18\u7684\u5b89\u88c5\u4f53\u9a8c\u3002 \u4e3a\u4e86\u5728\u5206\u5e03\u5f0f\u6a21\u5f0f\u4e0b\u8fd0\u884c\uff0c\u60a8\u8fd8\u9700\u8981\u5b89\u88c5OpenMPI\u3002

\u5982\u4e0b\u7684\u6307\u5f15\u5047\u8bbe\u60a8\u5df2\u7ecf\u5b8c\u6210\u4e86\u6240\u6709\u4f9d\u8d56\u5e93\u7684\u5b89\u88c5\u3002

"},{"location":"zh/installation/#pypi","title":"PyPI\u6e90\u5b89\u88c5","text":"

MindCV\u88ab\u53d1\u5e03\u4e3a\u4e00\u4e2aPython\u5305\u5e76\u80fd\u591f\u901a\u8fc7pip\u8fdb\u884c\u5b89\u88c5\u3002\u6211\u4eec\u63a8\u8350\u60a8\u5728\u865a\u62df\u73af\u5883\u5b89\u88c5\u4f7f\u7528\u3002 \u6253\u5f00\u7ec8\u7aef\uff0c\u8f93\u5165\u4ee5\u4e0b\u6307\u4ee4\u6765\u5b89\u88c5MindCV:

stablenightly
pip install mindcv\n
# \u6682\u4e0d\u652f\u6301\n

\u4e0a\u8ff0\u547d\u4ee4\u4f1a\u81ea\u52a8\u5b89\u88c5\u4f9d\u8d56\uff1aNumPy\uff0cPyYAML \u548c tqdm\u7684\u517c\u5bb9\u7248\u672c\u3002

Tip

\u5982\u679c\u60a8\u4e4b\u524d\u6ca1\u6709\u4f7f\u7528 Python \u7684\u7ecf\u9a8c\uff0c\u6211\u4eec\u5efa\u8bae\u60a8\u9605\u8bfb\u4f7f\u7528Python\u7684pip\u6765\u7ba1\u7406\u60a8\u7684\u9879\u76ee\u7684\u4f9d\u8d56\u5173\u7cfb\uff0c \u8fd9\u662f\u5bf9 Python \u5305\u7ba1\u7406\u673a\u5236\u7684\u4e00\u4e2a\u5f88\u597d\u7684\u4ecb\u7ecd\uff0c\u5e76\u4e14\u53ef\u4ee5\u5e2e\u52a9\u60a8\u5728\u9047\u5230\u9519\u8bef\u65f6\u8fdb\u884c\u6545\u969c\u6392\u9664\u3002

Warning

\u4e0a\u8ff0\u547d\u4ee4 \u4e0d\u4f1a \u5b89\u88c5MindSpore. \u6211\u4eec\u5f3a\u70c8\u63a8\u8350\u60a8\u901a\u8fc7\u5b98\u65b9\u6307\u5f15\u6765\u5b89\u88c5MindSpore\u3002

"},{"location":"zh/installation/#_2","title":"\u6e90\u7801\u5b89\u88c5 (\u672a\u7ecf\u6d4b\u8bd5\u7248\u672c)","text":""},{"location":"zh/installation/#from-vsc","title":"from VSC","text":"
pip install git+https://github.com/mindspore-lab/mindcv.git\n
"},{"location":"zh/installation/#from-local-src","title":"from local src","text":"

Tip

\u7531\u4e8e\u672c\u9879\u76ee\u5904\u4e8e\u6d3b\u8dc3\u5f00\u53d1\u9636\u6bb5\uff0c\u5982\u679c\u60a8\u662f\u5f00\u53d1\u8005\u6216\u8005\u8d21\u732e\u8005\uff0c\u8bf7\u4f18\u5148\u9009\u62e9\u6b64\u5b89\u88c5\u65b9\u5f0f\u3002

MindCV\u53ef\u4ee5\u5728\u7531 GitHub \u514b\u9686\u4ed3\u5e93\u5230\u672c\u5730\u6587\u4ef6\u5939\u540e\u76f4\u63a5\u4f7f\u7528\u3002 \u8fd9\u5bf9\u4e8e\u60f3\u4f7f\u7528\u6700\u65b0\u7248\u672c\u7684\u5f00\u53d1\u8005\u5341\u5206\u65b9\u4fbf:

git clone https://github.com/mindspore-lab/mindcv.git\n

\u5728\u514b\u9686\u5230\u672c\u5730\u4e4b\u540e\uff0c\u63a8\u8350\u60a8\u4f7f\u7528\"\u53ef\u7f16\u8f91\"\u6a21\u5f0f\u8fdb\u884c\u5b89\u88c5\uff0c\u8fd9\u6709\u52a9\u4e8e\u89e3\u51b3\u6f5c\u5728\u7684\u6a21\u5757\u5bfc\u5165\u95ee\u9898\u3002

cd mindcv\npip install -e .\n
"},{"location":"zh/modelzoo/","title":"\u6a21\u578b\u4ed3\u5e93","text":"Model Context Top-1 (%) Top-5 (%) Params(M) Recipe Download BiT_resnet50 D910x8-G 76.81 93.17 25.55 yaml weights BiT_resnet50x3 D910x8-G 80.63 95.12 217.31 yaml weights BiT_resnet101 D910x8-G 77.93 93.75 44.54 yaml weights coat_lite_tiny D910x8-G 77.35 93.43 5.72 yaml weights coat_lite_mini D910x8-G 78.51 93.84 11.01 yaml weights coat_tiny D910x8-G 79.67 94.88 5.50 yaml weights coat_mini D910x8-G 81.08 95.34 10.34 yaml weights convit_tiny D910x8-G 73.66 91.72 5.71 yaml weights convit_tiny_plus D910x8-G 77.00 93.60 9.97 yaml weights convit_small D910x8-G 81.63 95.59 27.78 yaml weights convit_small_plus D910x8-G 81.80 95.42 48.98 yaml weights convit_base D910x8-G 82.10 95.52 86.54 yaml weights convit_base_plus D910x8-G 81.96 95.04 153.13 yaml weights convnext_tiny D910x64-G 81.91 95.79 28.59 yaml weights convnext_small D910x64-G 83.40 96.36 50.22 yaml weights convnext_base D910x64-G 83.32 96.24 88.59 yaml weights convnextv2_tiny D910x8-G 82.43 95.98 28.64 yaml weights crossvit_9 D910x8-G 73.56 91.79 8.55 yaml weights crossvit_15 D910x8-G 81.08 95.33 27.27 yaml weights crossvit_18 D910x8-G 81.93 95.75 43.27 yaml weights densenet121 D910x8-G 75.64 92.84 8.06 yaml weights densenet161 D910x8-G 79.09 94.66 28.90 yaml weights densenet169 D910x8-G 77.26 93.71 14.31 yaml weights densenet201 D910x8-G 78.14 94.08 20.24 yaml weights dpn92 D910x8-G 79.46 94.49 37.79 yaml weights dpn98 D910x8-G 79.94 94.57 61.74 yaml weights dpn107 D910x8-G 80.05 94.74 87.13 yaml weights dpn131 D910x8-G 80.07 94.72 79.48 yaml weights edgenext_xx_small D910x8-G 71.02 89.99 1.33 yaml weights edgenext_x_small D910x8-G 75.14 92.50 2.34 yaml weights edgenext_small D910x8-G 79.15 94.39 5.59 yaml weights edgenext_base D910x8-G 82.24 95.94 18.51 yaml weights efficientnet_b0 D910x64-G 76.89 93.16 5.33 yaml weights efficientnet_b1 D910x64-G 78.95 94.34 7.86 yaml weights ghostnet_050 D910x8-G 66.03 86.64 2.60 yaml weights ghostnet_100 D910x8-G 73.78 91.66 5.20 yaml weights ghostnet_130 D910x8-G 75.50 92.56 7.39 yaml weights googlenet D910x8-G 72.68 90.89 6.99 yaml weights hrnet_w32 D910x8-G 80.64 95.44 41.30 yaml weights hrnet_w48 D910x8-G 81.19 95.69 77.57 yaml weights inception_v3 D910x8-G 79.11 94.40 27.20 yaml weights inception_v4 D910x8-G 80.88 95.34 42.74 yaml weights mixnet_s D910x8-G 75.52 92.52 4.17 yaml weights mixnet_m D910x8-G 76.64 93.05 5.06 yaml weights mixnet_l D910x8-G 78.73 94.31 7.38 yaml weights mnasnet_050 D910x8-G 68.07 88.09 2.14 yaml weights mnasnet_075 D910x8-G 71.81 90.53 3.20 yaml weights mnasnet_100 D910x8-G 74.28 91.70 4.42 yaml weights mnasnet_130 D910x8-G 75.65 92.64 6.33 yaml weights mnasnet_140 D910x8-G 76.01 92.83 7.16 yaml weights mobilenet_v1_025 D910x8-G 53.87 77.66 0.47 yaml weights mobilenet_v1_050 D910x8-G 65.94 86.51 1.34 yaml weights mobilenet_v1_075 D910x8-G 70.44 89.49 2.60 yaml weights mobilenet_v1_100 D910x8-G 72.95 91.01 4.25 yaml weights mobilenet_v2_075 D910x8-G 69.98 89.32 2.66 yaml weights mobilenet_v2_100 D910x8-G 72.27 90.72 3.54 yaml weights mobilenet_v2_140 D910x8-G 75.56 92.56 6.15 yaml weights mobilenet_v3_small_100 D910x8-G 68.10 87.86 2.55 yaml weights mobilenet_v3_large_100 D910x8-G 75.23 92.31 5.51 yaml weights mobilevit_xx_small D910x8-G 68.91 88.91 1.27 yaml weights mobilevit_x_small D910x8-G 74.99 92.32 2.32 yaml weights mobilevit_small D910x8-G 78.47 94.18 5.59 yaml weights nasnet_a_4x1056 D910x8-G 73.65 91.25 5.33 yaml weights pit_ti D910x8-G 72.96 91.33 4.85 yaml weights pit_xs D910x8-G 78.41 94.06 10.61 yaml weights pit_s D910x8-G 80.56 94.80 23.46 yaml weights pit_b D910x8-G 81.87 95.04 73.76 yaml weights poolformer_s12 D910x8-G 77.33 93.34 11.92 yaml weights pvt_tiny D910x8-G 74.81 92.18 13.23 yaml weights pvt_small D910x8-G 79.66 94.71 24.49 yaml weights pvt_medium D910x8-G 81.82 95.81 44.21 yaml weights pvt_large D910x8-G 81.75 95.70 61.36 yaml weights pvt_v2_b0 D910x8-G 71.50 90.60 3.67 yaml weights pvt_v2_b1 D910x8-G 78.91 94.49 14.01 yaml weights pvt_v2_b2 D910x8-G 81.99 95.74 25.35 yaml weights pvt_v2_b3 D910x8-G 82.84 96.24 45.24 yaml weights pvt_v2_b4 D910x8-G 83.14 96.27 62.56 yaml weights regnet_x_200mf D910x8-G 68.74 88.38 2.68 yaml weights regnet_x_400mf D910x8-G 73.16 91.35 5.16 yaml weights regnet_x_600mf D910x8-G 74.34 92.00 6.20 yaml weights regnet_x_800mf D910x8-G 76.04 92.97 7.26 yaml weights regnet_y_200mf D910x8-G 70.30 89.61 3.16 yaml weights regnet_y_400mf D910x8-G 73.91 91.84 4.34 yaml weights regnet_y_600mf D910x8-G 75.69 92.50 6.06 yaml weights regnet_y_800mf D910x8-G 76.52 93.10 6.26 yaml weights regnet_y_16gf D910x8-G 82.92 96.29 83.71 yaml weights repmlp_t224 D910x8-G 76.71 93.30 38.30 yaml weights repvgg_a0 D910x8-G 72.19 90.75 9.13 yaml weights repvgg_a1 D910x8-G 74.19 91.89 14.12 yaml weights repvgg_a2 D910x8-G 76.63 93.42 28.25 yaml weights repvgg_b0 D910x8-G 74.99 92.40 15.85 yaml weights repvgg_b1 D910x8-G 78.81 94.37 57.48 yaml weights repvgg_b2 D910x64-G 79.29 94.66 89.11 yaml weights repvgg_b3 D910x64-G 80.46 95.34 123.19 yaml weights repvgg_b1g2 D910x8-G 78.03 94.09 45.85 yaml weights repvgg_b1g4 D910x8-G 77.64 94.03 40.03 yaml weights repvgg_b2g4 D910x8-G 78.8 94.36 61.84 yaml weights res2net50 D910x8-G 79.35 94.64 25.76 yaml weights res2net101 D910x8-G 79.56 94.70 45.33 yaml weights res2net50_v1b D910x8-G 80.32 95.09 25.77 yaml weights res2net101_v1b D910x8-G 81.14 95.41 45.35 yaml weights resnest50 D910x8-G 80.81 95.16 27.55 yaml weights resnest101 D910x8-G 82.90 96.12 48.41 yaml weights resnet18 D910x8-G 70.21 89.62 11.70 yaml weights resnet34 D910x8-G 74.15 91.98 21.81 yaml weights resnet50 D910x8-G 76.69 93.50 25.61 yaml weights resnet101 D910x8-G 78.24 94.09 44.65 yaml weights resnet152 D910x8-G 78.72 94.45 60.34 yaml weights resnetv2_50 D910x8-G 76.90 93.37 25.60 yaml weights resnetv2_101 D910x8-G 78.48 94.23 44.55 yaml weights resnext50_32x4d D910x8-G 78.53 94.10 25.10 yaml weights resnext101_32x4d D910x8-G 79.83 94.80 44.32 yaml weights resnext101_64x4d D910x8-G 80.30 94.82 83.66 yaml weights resnext152_64x4d D910x8-G 80.52 95.00 115.27 yaml weights rexnet_09 D910x8-G 77.06 93.41 4.13 yaml weights rexnet_10 D910x8-G 77.38 93.60 4.84 yaml weights rexnet_13 D910x8-G 79.06 94.28 7.61 yaml weights rexnet_15 D910x8-G 79.95 94.74 9.79 yaml weights rexnet_20 D910x8-G 80.64 94.99 16.45 yaml weights seresnet18 D910x8-G 71.81 90.49 11.80 yaml weights seresnet34 D910x8-G 75.38 92.50 21.98 yaml weights seresnet50 D910x8-G 78.32 94.07 28.14 yaml weights seresnext26_32x4d D910x8-G 77.17 93.42 16.83 yaml weights seresnext50_32x4d D910x8-G 78.71 94.36 27.63 yaml weights shufflenet_v1_g3_05 D910x8-G 57.05 79.73 0.73 yaml weights shufflenet_v1_g3_10 D910x8-G 67.77 87.73 1.89 yaml weights shufflenet_v2_x0_5 D910x8-G 60.53 82.11 1.37 yaml weights shufflenet_v2_x1_0 D910x8-G 69.47 88.88 2.29 yaml weights shufflenet_v2_x1_5 D910x8-G 72.79 90.93 3.53 yaml weights shufflenet_v2_x2_0 D910x8-G 75.07 92.08 7.44 yaml weights skresnet18 D910x8-G 73.09 91.20 11.97 yaml weights skresnet34 D910x8-G 76.71 93.10 22.31 yaml weights skresnext50_32x4d D910x8-G 79.08 94.60 37.31 yaml weights squeezenet1_0 D910x8-G 59.01 81.01 1.25 yaml weights squeezenet1_0 GPUx8-G 58.83 81.08 1.25 yaml weights squeezenet1_1 D910x8-G 58.44 80.84 1.24 yaml weights squeezenet1_1 GPUx8-G 59.18 81.41 1.24 yaml weights swin_tiny D910x8-G 80.82 94.80 33.38 yaml weights swinv2_tiny_window8 D910x8-G 81.42 95.43 28.78 yaml weights vgg11 D910x8-G 71.86 90.50 132.86 yaml weights vgg13 D910x8-G 72.87 91.02 133.04 yaml weights vgg16 D910x8-G 74.61 91.87 138.35 yaml weights vgg19 D910x8-G 75.21 92.56 143.66 yaml weights visformer_tiny D910x8-G 78.28 94.15 10.33 yaml weights visformer_tiny_v2 D910x8-G 78.82 94.41 9.38 yaml weights visformer_small D910x8-G 81.76 95.88 40.25 yaml weights visformer_small_v2 D910x8-G 82.17 95.90 23.52 yaml weights vit_b_32_224 D910x8-G 75.86 92.08 87.46 yaml weights vit_l_16_224 D910x8-G 76.34 92.79 303.31 yaml weights vit_l_32_224 D910x8-G 73.71 90.92 305.52 yaml weights volo_d1 D910x8-G 82.59 95.99 27 yaml weights xception D910x8-G 79.01 94.25 22.91 yaml weights xcit_tiny_12_p16_224 D910x8-G 77.67 93.79 7.00 yaml weights"},{"location":"zh/how_to_guides/write_a_new_model/","title":"\u6a21\u578b\u7f16\u5199\u6307\u5357","text":"

\u672c\u6587\u6863\u63d0\u4f9b\u4e86\u7f16\u5199MindSpore\u5957\u4ef6\u4e2d\u7684\u6a21\u578b\u5b9a\u4e49\u6587\u4ef6model.py\u7684\u53c2\u8003\u6a21\u677f\uff0c\u65e8\u5728\u63d0\u4f9b\u4e00\u79cd\u7edf\u4e00\u7684\u4ee3\u7801\u98ce\u683c\u3002

\u63a5\u4e0b\u6765\u6211\u4eec\u4ee5\u76f8\u5bf9\u7b80\u5355\u7684\u65b0\u6a21\u578bMLP-Mixer\u4f5c\u4e3a\u793a\u4f8b\u3002

"},{"location":"zh/how_to_guides/write_a_new_model/#_2","title":"\u6587\u4ef6\u5934","text":"

\u8be5\u6587\u4ef6\u7684**\u7b80\u8981\u63cf\u8ff0**\u3002\u5305\u542b\u6a21\u578b\u540d\u79f0\u548c\u8bba\u6587\u9898\u76ee\u3002\u5982\u4e0b\u6240\u793a\uff1a

\"\"\"\nMindSpore implementation of `${MODEL_NAME}`.\nRefer to ${PAPER_NAME}.\n\"\"\"\n
"},{"location":"zh/how_to_guides/write_a_new_model/#_3","title":"\u6a21\u5757\u5bfc\u5165","text":"

\u6a21\u5757\u5bfc\u5165\u5206\u4e3a\u4e09\u79cd\u7c7b\u578b\u3002\u5206\u522b\u4e3a

  • Python\u539f\u751f\u6216\u7b2c\u4e09\u65b9\u5e93\u3002\u5982import math\u3001import numpy as np\u7b49\u7b49\u3002\u5e94\u5f53\u653e\u5728\u7b2c\u4e00\u68af\u961f\u3002
  • MindSpore\u76f8\u5173\u6a21\u5757\u3002\u5982import mindspore.nn as nn\u3001import mindspore.ops as ops\u7b49\u7b49\u3002\u5e94\u5f53\u653e\u5728\u7b2c\u4e8c\u68af\u961f\u3002
  • \u5957\u4ef6\u5305\u5185\u6a21\u5757\u3002\u5982from .layers.classifier import ClassifierHead\u7b49\u7b49\u3002\u5e94\u5f53\u653e\u5728\u7b2c\u4e09\u68af\u961f\uff0c\u5e76\u4f7f\u7528\u76f8\u5bf9\u5bfc\u5165\u3002

\u793a\u4f8b\u5982\u4e0b\uff1a

import math\nfrom collections import OrderedDict\n\nimport mindspore.nn as nn\nimport mindspore.ops as ops\nimport mindspore.common.initializer as init\n\nfrom .utils import load_pretrained\nfrom .layers.classifier import ClassifierHead\n

\u4ec5\u5bfc\u5165\u5fc5\u987b\u7684\u6a21\u5757\u6216\u5305\uff0c\u907f\u514d\u5bfc\u5165\u65e0\u7528\u5305\u3002

"},{"location":"zh/how_to_guides/write_a_new_model/#__all__","title":"__all__","text":"

Python \u6ca1\u6709\u539f\u751f\u7684\u53ef\u89c1\u6027\u63a7\u5236\uff0c\u5176\u53ef\u89c1\u6027\u7684\u7ef4\u62a4\u662f\u9760\u4e00\u5957\u9700\u8981\u5927\u5bb6\u81ea\u89c9\u9075\u5b88\u7684\u201c\u7ea6\u5b9a\u201d\u3002__all__ \u662f\u9488\u5bf9\u6a21\u5757\u516c\u5f00\u63a5\u53e3\u7684\u4e00\u79cd\u7ea6\u5b9a\uff0c\u4ee5\u63d0\u4f9b\u4e86\u201d\u767d\u540d\u5355\u201c\u7684\u5f62\u5f0f\u66b4\u9732\u63a5\u53e3\u3002\u5982\u679c\u5b9a\u4e49\u4e86__all__\uff0c\u5176\u4ed6\u6587\u4ef6\u4e2d\u4f7f\u7528from xxx import *\u5bfc\u5165\u8be5\u6587\u4ef6\u65f6\uff0c\u53ea\u4f1a\u5bfc\u5165__all__\u5217\u51fa\u7684\u6210\u5458\uff0c\u53ef\u4ee5\u5176\u4ed6\u6210\u5458\u90fd\u88ab\u6392\u9664\u5728\u5916\u3002

\u6211\u4eec\u7ea6\u5b9a\u6a21\u578b\u4e2d\u5bf9\u5916\u66b4\u9732\u7684\u63a5\u53e3\u5305\u62ec\u4e3b\u6a21\u578b\u7c7b\u4ee5\u53ca\u8fd4\u56de\u4e0d\u540c\u89c4\u683c\u6a21\u578b\u7684\u51fd\u6570\uff0c\u4f8b\u5982\uff1a

__all__ = [\n    \"MLPMixer\",\n    \"mlp_mixer_s_p32\",\n    \"mlp_mixer_s_p16\",\n    ...\n]\n

\u5176\u4e2d\"MLPMixer\"\u662f\u4e3b\u6a21\u578b\u7c7b\uff0c\"mlp_mixer_s_p32\"\u548c\"mlp_mixer_s_p16\"\u7b49\u662f\u8fd4\u56de\u4e0d\u540c\u89c4\u683c\u6a21\u578b\u7684\u51fd\u6570\u3002\u4e00\u822c\u6765\u8bf4\u5b50\u6a21\u578b\uff0c\u5373\u67d0Layer\u6216\u67d0Block\u662f\u4e0d\u5e94\u8be5\u88ab\u5176\u4ed6\u6587\u4ef6\u6240\u5171\u7528\u7684\u3002\u5982\u82e5\u6b64\uff0c\u5e94\u5f53\u8003\u8651\u5c06\u8be5\u5b50\u6a21\u578b\u63d0\u53d6\u5230${MINDCLS}/models/layers\u4e0b\u9762\u4f5c\u4e3a\u516c\u7528\u6a21\u5757\uff0c\u5982SEBlock\u7b49\u3002

"},{"location":"zh/how_to_guides/write_a_new_model/#_4","title":"\u5b50\u6a21\u578b","text":"

\u6211\u4eec\u90fd\u77e5\u9053\u4e00\u4e2a\u6df1\u5ea6\u6a21\u578b\u662f\u7531\u591a\u5c42\u7ec4\u6210\u7684\u7f51\u7edc\u3002\u5176\u4e2d\u67d0\u4e9b\u5c42\u53ef\u4ee5\u7ec4\u6210\u76f8\u540c\u62d3\u6251\u7ed3\u6784\u7684\u5b50\u6a21\u578b\uff0c\u6211\u4eec\u4e00\u822c\u79f0\u5176\u4e3aLayer\u6216\u8005Block\uff0c\u4f8b\u5982ResidualBlock\u7b49\u3002\u8fd9\u79cd\u62bd\u8c61\u6709\u5229\u4e8e\u6211\u4eec\u7406\u89e3\u6574\u4e2a\u6a21\u578b\u7ed3\u6784\uff0c\u4e5f\u6709\u5229\u4e8e\u4ee3\u7801\u7684\u7f16\u5199\u3002

\u6211\u4eec\u5e94\u5f53\u901a\u8fc7\u7c7b\u6ce8\u91ca\u5bf9\u5b50\u6a21\u578b\u8fdb\u884c\u529f\u80fd\u7684\u7b80\u8981\u63cf\u8ff0\u3002\u5728MindSpore\u4e2d\uff0c\u6a21\u578b\u7684\u7c7b\u7ee7\u627f\u4e8enn.Cell\uff0c\u4e00\u822c\u6765\u8bf4\u6211\u4eec\u9700\u8981\u91cd\u8f7d\u4ee5\u4e0b\u4e24\u4e2a\u51fd\u6570\uff1a

  • \u5728__init__\u51fd\u6570\u4e2d\uff0c\u6211\u4eec\u5e94\u5f53\u5b9a\u4e49\u6a21\u578b\u4e2d\u9700\u8981\u7528\u5230\u7684\u795e\u7ecf\u7f51\u7edc\u5c42\uff08__init__\u4e2d\u7684\u53c2\u6570\u8981\u8fdb\u884c\u53c2\u6570\u7c7b\u578b\u58f0\u660e\uff0c\u5373type hint\uff09\u3002
  • \u5728construct\u51fd\u6570\u4e2d\u6211\u4eec\u5b9a\u4e49\u6a21\u578b\u524d\u5411\u903b\u8f91\u3002

\u793a\u4f8b\u5982\u4e0b\uff1a

class MixerBlock(nn.Cell):\n\"\"\"Mixer Layer with token-mixing MLP and channel-mixing MLP\"\"\"\n\n    def __init__(self,\n                 n_patches: int,\n                 n_channels: int,\n                 token_dim: int,\n                 channel_dim: int,\n                 dropout: float = 0.\n                 ) -> None:\n        super().__init__()\n        self.token_mix = nn.SequentialCell(\n            nn.LayerNorm((n_channels,)),\n            TransPose((0, 2, 1)),\n            FeedForward(n_patches, token_dim, dropout),\n            TransPose((0, 2, 1))\n        )\n        self.channel_mix = nn.SequentialCell(\n            nn.LayerNorm((n_channels,)),\n            FeedForward(n_channels, channel_dim, dropout),\n        )\n\n    def construct(self, x):\n        x = x + self.token_mix(x)\n        x = x + self.channel_mix(x)\n        return x\n

\u5728nn.Cell\u7c7b\u7684\u7f16\u5199\u8fc7\u7a0b\u4e2d\uff0c\u6709\u4e24\u4e2a\u503c\u5f97\u6ce8\u610f\u7684\u65b9\u9762

  • CellList & SequentialCell

  • CellList is just a container that contains a list of neural network layers(Cell). The Cells contained by it can be properly registered, and will be visible by all Cell methods. We must overwrite the forward calculation, that is, the construct function.

  • SequentialCell is a container that holds a sequential list of layers(Cell). The Cells may have a name(OrderedDict) or not(List). We don't need to implement forward computation, which is done according to the order of the sequential list.

  • construct

  • Assert is not supported. [RuntimeError: ParseStatement] Unsupported statement 'Assert'.

  • Usage of single operator\u3002\u8c03\u7528\u7b97\u5b50\u65f6\uff08\u5982concat, reshape, mean\uff09\uff0c\u4f7f\u7528\u51fd\u6570\u5f0f\u63a5\u53e3 mindspore.ops.functional (\u5982 output=ops.concat((x1, x2)))\uff0c\u907f\u514d\u5148\u5728__init__\u4e2d\u5b9e\u4f8b\u5316\u539f\u59cb\u7b97\u5b50 ops.Primitive (\u5982self.concat=ops.Concat()) \u518d\u5728construct\u4e2d\u8c03\u7528\uff08output=self.concat((x1, x2))\uff09\u3002

"},{"location":"zh/how_to_guides/write_a_new_model/#_5","title":"\u4e3b\u6a21\u578b","text":"

\u4e3b\u6a21\u578b\u662f\u8bba\u6587\u4e2d\u6240\u63d0\u51fa\u7684\u7f51\u7edc\u6a21\u578b\u5b9a\u4e49\uff0c\u7531\u591a\u4e2a\u5b50\u6a21\u578b\u5806\u53e0\u800c\u6210\u3002\u5b83\u662f\u9002\u7528\u4e8e\u5206\u7c7b\u3001\u68c0\u6d4b\u7b49\u4efb\u52a1\u7684\u6700\u9876\u5c42\u7f51\u7edc\u3002\u5b83\u5728\u4ee3\u7801\u4e66\u5199\u4e0a\u4e0e\u5b50\u6a21\u578b\u4e0a\u57fa\u672c\u7c7b\u4f3c\uff0c\u4f46\u6709\u51e0\u5904\u4e0d\u540c\u3002

  • \u7c7b\u6ce8\u91ca\u3002\u6211\u4eec\u5e94\u5f53\u5728\u6b64\u7ed9\u51fa\u8bba\u6587\u7684\u9898\u76ee\u548c\u94fe\u63a5\u3002\u53e6\u5916\u7531\u4e8e\u8be5\u7c7b\u5bf9\u5916\u66b4\u9732\uff0c\u6211\u4eec\u6700\u597d\u4e5f\u52a0\u4e0a\u7c7b\u521d\u59cb\u5316\u53c2\u6570\u7684\u8bf4\u660e\u3002\u8be6\u89c1\u4e0b\u65b9\u4ee3\u7801\u3002
  • forward_features\u51fd\u6570\u3002\u5728\u51fd\u6570\u5185\u5bf9\u6a21\u578b\u7684\u7279\u5f81\u7f51\u7edc\u7684\u8fd0\u7b97\u5b9a\u4e49\u3002
  • forward_head\u51fd\u6570\u3002\u5728\u51fd\u6570\u5185\u5bf9\u6a21\u578b\u7684\u5206\u7c7b\u5668\u7684\u8fd0\u7b97\u8fdb\u884c\u5b9a\u4e49\u3002
  • construct\u51fd\u6570\u3002\u5728\u51fd\u6570\u8c03\u7528\u7279\u5f81\u7f51\u7edc\u548c\u5206\u7c7b\u5668\u7684\u8fd0\u7b97\u3002
  • _initialize_weights\u51fd\u6570\u3002\u6211\u4eec\u7ea6\u5b9a\u6a21\u578b\u53c2\u6570\u7684\u968f\u673a\u521d\u59cb\u5316\u7531\u8be5\u6210\u5458\u51fd\u6570\u5b8c\u6210\u3002\u8be6\u89c1\u4e0b\u65b9\u4ee3\u7801\u3002

\u793a\u4f8b\u5982\u4e0b\uff1a

class MLPMixer(nn.Cell):\nr\"\"\"MLP-Mixer model class, based on\n    `\"MLP-Mixer: An all-MLP Architecture for Vision\" <https://arxiv.org/abs/2105.01601>`_\n\n    Args:\n        depth (int) : number of MixerBlocks.\n        patch_size (Union[int, tuple]) : size of a single image patch.\n        n_patches (int) : number of patches.\n        n_channels (int) : channels(dimension) of a single embedded patch.\n        token_dim (int) : hidden dim of token-mixing MLP.\n        channel_dim (int) : hidden dim of channel-mixing MLP.\n        in_channels(int): number the channels of the input. Default: 3.\n        n_classes (int) : number of classification classes. Default: 1000.\n    \"\"\"\n\n    def __init__(self,\n                 depth: int,\n                 patch_size: Union[int, tuple],\n                 n_patches: int,\n                 n_channels: int,\n                 token_dim: int,\n                 channel_dim: int,\n                 in_channels: int = 3,\n                 n_classes: int = 1000,\n                 ) -> None:\n        super().__init__()\n        self.n_patches = n_patches\n        self.n_channels = n_channels\n        # patch with shape of (3, patch_size, patch_size) is embedded to n_channels dim feature.\n        self.to_patch_embedding = nn.SequentialCell(\n            nn.Conv2d(in_chans, n_channels, patch_size, patch_size, pad_mode=\"pad\", padding=0),\n            TransPose(permutation=(0, 2, 1), embedding=True),\n        )\n        self.mixer_blocks = nn.SequentialCell()\n        for _ in range(depth):\n            self.mixer_blocks.append(MixerBlock(n_patches, n_channels, token_dim, channel_dim))\n        self.layer_norm = nn.LayerNorm((n_channels,))\n        self.mlp_head = nn.Dense(n_channels, n_classes)\n        self._initialize_weights()\n\n    def forward_features(self, x: Tensor) -> Tensor:\n        x = self.to_patch_embedding(x)\n        x = self.mixer_blocks(x)\n        x = self.layer_norm(x)\n        return ops.mean(x, 1)\n\n    def forward_head(self, x: Tensor)-> Tensor:\n        return self.mlp_head(x)\n\n    def construct(self, x: Tensor) -> Tensor:\n        x = self.forward_features(x)\n        return self.forward_head(x)\n\n    def _initialize_weights(self) -> None:\n        for name, m in self.cells_and_names():\n            if isinstance(m, nn.Conv2d):\n                m.weight.set_data(init.initializer(init.Normal(0.01, 0), m.weight.shape))\n                if m.bias is not None:\n                    m.bias.set_data(init.initializer(init.Constant(0), m.bias.shape))\n            elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):\n                m.gamma.set_data(init.initializer(init.Constant(1), m.gamma.shape))\n                if m.beta is not None:\n                    m.beta.set_data(init.initializer(init.Constant(0.0001), m.beta.shape))\n            elif isinstance(m, nn.Dense):\n                m.weight.set_data(init.initializer(init.Normal(0.01, 0), m.weight.shape))\n                if m.bias is not None:\n                    m.bias.set_data(init.initializer(init.Constant(0), m.bias.shape))\n
"},{"location":"zh/how_to_guides/write_a_new_model/#_6","title":"\u89c4\u683c\u51fd\u6570","text":"

\u8bba\u6587\u4e2d\u6240\u63d0\u51fa\u7684\u6a21\u578b\u53ef\u80fd\u6709\u4e0d\u540c\u89c4\u683c\u7684\u53d8\u79cd\uff0c\u5982channel\u7684\u5927\u5c0f\u3001depth\u7684\u5927\u5c0f\u7b49\u7b49\u3002\u8fd9\u4e9b\u53d8\u79cd\u7684\u5177\u4f53\u914d\u7f6e\u5e94\u8be5\u901a\u8fc7\u89c4\u683c\u51fd\u6570\u4f53\u73b0\uff0c\u89c4\u683c\u7684\u63a5\u53e3\u53c2\u6570\uff1a pretrained, num_classes, in_channels \u547d\u540d\u8981\u7edf\u4e00\uff0c\u540c\u65f6\u5728\u89c4\u683c\u51fd\u6570\u5185\u8fd8\u8981\u8fdb\u884cpretrain loading\u64cd\u4f5c\u3002\u6bcf\u4e00\u4e2a\u89c4\u683c\u51fd\u6570\u5bf9\u5e94\u4e00\u79cd\u786e\u5b9a\u914d\u7f6e\u7684\u89c4\u683c\u53d8\u79cd\u3002\u914d\u7f6e\u901a\u8fc7\u5165\u53c2\u4f20\u5165\u4e3b\u6a21\u578b\u7c7b\u7684\u5b9a\u4e49\uff0c\u5e76\u8fd4\u56de\u5b9e\u4f8b\u5316\u7684\u4e3b\u6a21\u578b\u7c7b\u3002\u53e6\u5916\uff0c\u8fd8\u9700\u901a\u8fc7\u6dfb\u52a0\u88c5\u9970\u5668@register_model\u5c06\u8be5\u6a21\u578b\u7684\u6b64\u89c4\u683c\u6ce8\u518c\u5230\u5305\u5185\u3002

\u793a\u4f8b\u5982\u4e0b\uff1a

@register_model\ndef mlp_mixer_s_p16(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):\n    nl, pr, ls, hs, ds, dc = 8, 16, 196, 512, 256, 2048\n    _check_resolution_and_length_of_patch(pr, ls)\n    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds,\n                    channel_dim=dc, in_chans=in_chans, n_classes=num_classes, **kwargs)\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n\n@register_model\ndef mlp_mixer_b_p32(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):\n    nl, pr, ls, hs, ds, dc = 12, 32, 49, 768, 384, 3072\n    _check_resolution_and_length_of_patch(pr, ls)\n    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds,\n                    channel_dim=dc, in_chans=in_chans, n_classes=num_classes, **kwargs)\n    if pretrained:\n        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)\n    return model\n
"},{"location":"zh/how_to_guides/write_a_new_model/#main","title":"\u9a8c\u8bc1main\uff08\u53ef\u9009\uff09","text":"

\u521d\u59cb\u7f16\u5199\u9636\u6bb5\u5e94\u5f53\u4fdd\u8bc1\u6a21\u578b\u662f\u53ef\u8fd0\u884c\u7684\u3002\u53ef\u901a\u8fc7\u4e0b\u8ff0\u4ee3\u7801\u5757\u8fdb\u884c\u57fa\u7840\u9a8c\u8bc1\uff1a

if __name__ == '__main__':\n    import numpy as np\n    import mindspore\n    from mindspore import Tensor\n\n    model = mlp_mixer_s_p16()\n    print(model)\n    dummy_input = Tensor(np.random.rand(8, 3, 224, 224), dtype=mindspore.float32)\n    y = model(dummy_input)\n    print(y.shape)\n
"},{"location":"zh/how_to_guides/write_a_new_model/#_7","title":"\u53c2\u8003\u793a\u4f8b","text":"
  • densenet.py
  • shufflenetv1.py
  • shufflenetv2.py
  • mixnet.py
  • mlp_mixer.py
"},{"location":"zh/notes/changelog/","title":"\u66f4\u65b0\u65e5\u5fd7","text":"

\u5373\u5c06\u5230\u6765

"},{"location":"zh/notes/code_of_conduct/","title":"\u884c\u4e3a\u51c6\u5219","text":"

\u5373\u5c06\u5230\u6765

"},{"location":"zh/notes/faq/","title":"\u5e38\u89c1\u95ee\u9898","text":"

\u5373\u5c06\u5230\u6765

"},{"location":"zh/tutorials/configuration/","title":"\u914d\u7f6e","text":"

MindCV\u5957\u4ef6\u53ef\u4ee5\u901a\u8fc7python\u7684argparse\u5e93\u548cPyYAML\u5e93\u89e3\u6790\u6a21\u578b\u7684yaml\u6587\u4ef6\u6765\u8fdb\u884c\u53c2\u6570\u7684\u914d\u7f6e\u3002 \u4e0b\u9762\u6211\u4eec\u4ee5squeezenet_1.0\u6a21\u578b\u4e3a\u4f8b\uff0c\u89e3\u91ca\u5982\u4f55\u914d\u7f6e\u76f8\u5e94\u7684\u53c2\u6570\u3002

"},{"location":"zh/tutorials/configuration/#_2","title":"\u57fa\u7840\u73af\u5883","text":"
  1. \u53c2\u6570\u8bf4\u660e
  • mode\uff1a\u4f7f\u7528\u9759\u6001\u56fe\u6a21\u5f0f\uff080\uff09\u6216\u52a8\u6001\u56fe\u6a21\u5f0f\uff081\uff09\u3002

  • distribute\uff1a\u662f\u5426\u4f7f\u7528\u5206\u5e03\u5f0f\u3002

  1. yaml\u6587\u4ef6\u6837\u4f8b

    mode: 0\ndistribute: True\n...\n
  2. parse\u53c2\u6570\u8bbe\u7f6e

    python train.py --mode 0 --distribute False ...\n
  3. \u5bf9\u5e94\u7684\u4ee3\u7801\u793a\u4f8b

    args.mode\u4ee3\u8868\u53c2\u6570mode, args.distribute\u4ee3\u8868\u53c2\u6570distribute\u3002

    def train(args):\n    ms.set_context(mode=args.mode)\n\n    if args.distribute:\n        init()\n        device_num = get_group_size()\n        rank_id = get_rank()\n        ms.set_auto_parallel_context(device_num=device_num,\n                                     parallel_mode='data_parallel',\n                                     gradients_mean=True)\n    else:\n        device_num = None\n        rank_id = None\n    ...\n
"},{"location":"zh/tutorials/configuration/#_3","title":"\u6570\u636e\u96c6","text":"
  1. \u53c2\u6570\u8bf4\u660e
  • dataset\uff1a\u6570\u636e\u96c6\u540d\u79f0\u3002

  • data_dir\uff1a\u6570\u636e\u96c6\u6587\u4ef6\u6240\u5728\u8def\u5f84\u3002

  • shuffle\uff1a\u662f\u5426\u8fdb\u884c\u6570\u636e\u6df7\u6d17\u3002

  • dataset_download\uff1a\u662f\u5426\u4e0b\u8f7d\u6570\u636e\u96c6\u3002

  • batch_size\uff1a\u6bcf\u4e2a\u6279\u5904\u7406\u6570\u636e\u5305\u542b\u7684\u6570\u636e\u6761\u76ee\u3002

  • drop_remainder\uff1a\u5f53\u6700\u540e\u4e00\u4e2a\u6279\u5904\u7406\u6570\u636e\u5305\u542b\u7684\u6570\u636e\u6761\u76ee\u5c0f\u4e8e batch_size \u65f6\uff0c\u662f\u5426\u5c06\u8be5\u6279\u5904\u7406\u4e22\u5f03\u3002

  • num_parallel_workers\uff1a\u8bfb\u53d6\u6570\u636e\u7684\u5de5\u4f5c\u7ebf\u7a0b\u6570\u3002

  1. yaml\u6587\u4ef6\u6837\u4f8b

    dataset: 'imagenet'\ndata_dir: './imagenet2012'\nshuffle: True\ndataset_download: False\nbatch_size: 32\ndrop_remainder: True\nnum_parallel_workers: 8\n...\n
  2. parse\u53c2\u6570\u8bbe\u7f6e

    python train.py ... --dataset imagenet --data_dir ./imagenet2012 --shuffle True \\\n--dataset_download False --batch_size 32 --drop_remainder True \\\n--num_parallel_workers 8 ...\n
  3. \u5bf9\u5e94\u7684\u4ee3\u7801\u793a\u4f8b

    def train(args):\n    ...\n    dataset_train = create_dataset(\n        name=args.dataset,\n        root=args.data_dir,\n        split='train',\n        shuffle=args.shuffle,\n        num_samples=args.num_samples,\n        num_shards=device_num,\n        shard_id=rank_id,\n        num_parallel_workers=args.num_parallel_workers,\n        download=args.dataset_download,\n        num_aug_repeats=args.aug_repeats)\n\n    ...\n    target_transform = transforms.OneHot(num_classes) if args.loss == 'BCE' else None\n\n    loader_train = create_loader(\n        dataset=dataset_train,\n        batch_size=args.batch_size,\n        drop_remainder=args.drop_remainder,\n        is_training=True,\n        mixup=args.mixup,\n        cutmix=args.cutmix,\n        cutmix_prob=args.cutmix_prob,\n        num_classes=args.num_classes,\n        transform=transform_list,\n        target_transform=target_transform,\n        num_parallel_workers=args.num_parallel_workers,\n    )\n    ...\n
"},{"location":"zh/tutorials/configuration/#_4","title":"\u6570\u636e\u589e\u5f3a","text":"
  1. \u53c2\u6570\u8bf4\u660e
  • image_resize\uff1a\u56fe\u50cf\u7684\u8f93\u51fa\u5c3a\u5bf8\u5927\u5c0f\u3002

  • scale\uff1a\u8981\u88c1\u526a\u7684\u539f\u59cb\u5c3a\u5bf8\u5927\u5c0f\u7684\u5404\u4e2a\u5c3a\u5bf8\u7684\u8303\u56f4\u3002

  • ratio\uff1a\u88c1\u526a\u5bbd\u9ad8\u6bd4\u7684\u8303\u56f4\u3002

  • hfilp\uff1a\u56fe\u50cf\u88ab\u7ffb\u8f6c\u7684\u6982\u7387\u3002

  • interpolation\uff1a\u56fe\u50cf\u63d2\u503c\u65b9\u5f0f\u3002

  • crop_pct\uff1a\u8f93\u5165\u56fe\u50cf\u4e2d\u5fc3\u88c1\u526a\u767e\u5206\u6bd4\u3002

  • color_jitter\uff1a\u989c\u8272\u6296\u52a8\u56e0\u5b50\uff08\u4eae\u5ea6\u8c03\u6574\u56e0\u5b50\uff0c\u5bf9\u6bd4\u5ea6\u8c03\u6574\u56e0\u5b50\uff0c\u9971\u548c\u5ea6\u8c03\u6574\u56e0\u5b50\uff09\u3002

  • re_prob\uff1a\u6267\u884c\u968f\u673a\u64e6\u9664\u7684\u6982\u7387\u3002

  1. yaml\u6587\u4ef6\u6837\u4f8b

    image_resize: 224\nscale: [0.08, 1.0]\nratio: [0.75, 1.333]\nhflip: 0.5\ninterpolation: 'bilinear'\ncrop_pct: 0.875\ncolor_jitter: [0.4, 0.4, 0.4]\nre_prob: 0.5\n...\n
  2. parse\u53c2\u6570\u8bbe\u7f6e

    python train.py ... --image_resize 224 --scale [0.08, 1.0] --ratio [0.75, 1.333] \\\n--hflip 0.5 --interpolation \"bilinear\" --crop_pct 0.875 \\\n--color_jitter [0.4, 0.4, 0.4] --re_prob 0.5 ...\n
  3. \u5bf9\u5e94\u7684\u4ee3\u7801\u793a\u4f8b

    def train(args):\n    ...\n    transform_list = create_transforms(\n        dataset_name=args.dataset,\n        is_training=True,\n        image_resize=args.image_resize,\n        scale=args.scale,\n        ratio=args.ratio,\n        hflip=args.hflip,\n        vflip=args.vflip,\n        color_jitter=args.color_jitter,\n        interpolation=args.interpolation,\n        auto_augment=args.auto_augment,\n        mean=args.mean,\n        std=args.std,\n        re_prob=args.re_prob,\n        re_scale=args.re_scale,\n        re_ratio=args.re_ratio,\n        re_value=args.re_value,\n        re_max_attempts=args.re_max_attempts\n    )\n    ...\n
"},{"location":"zh/tutorials/configuration/#_5","title":"\u6a21\u578b","text":"
  1. \u53c2\u6570\u8bf4\u660e
  • model\uff1a\u6a21\u578b\u540d\u79f0\u3002

  • num_classes\uff1a\u5206\u7c7b\u7684\u7c7b\u522b\u6570\u3002

  • pretrained\uff1a\u662f\u5426\u52a0\u8f7d\u9884\u8bad\u7ec3\u6a21\u578b\u3002

  • ckpt_path\uff1a\u53c2\u6570\u6587\u4ef6\u6240\u5728\u7684\u8def\u5f84\u3002

  • keep_checkpoint_max\uff1a\u6700\u591a\u4fdd\u5b58\u591a\u5c11\u4e2acheckpoint\u6587\u4ef6\u3002

  • ckpt_save_dir\uff1a\u4fdd\u5b58\u53c2\u6570\u6587\u4ef6\u7684\u8def\u5f84\u3002

  • epoch_size\uff1a\u8bad\u7ec3\u6267\u884c\u8f6e\u6b21\u3002

  • dataset_sink_mode\uff1a\u6570\u636e\u662f\u5426\u76f4\u63a5\u4e0b\u6c89\u81f3\u5904\u7406\u5668\u8fdb\u884c\u5904\u7406\u3002

  • amp_level\uff1a\u6df7\u5408\u7cbe\u5ea6\u7b49\u7ea7\u3002

  1. yaml\u6587\u4ef6\u6837\u4f8b

    model: 'squeezenet1_0'\nnum_classes: 1000\npretrained: False\nckpt_path: './squeezenet1_0_gpu.ckpt'\nkeep_checkpoint_max: 10\nckpt_save_dir: './ckpt/'\nepoch_size: 200\ndataset_sink_mode: True\namp_level: 'O0'\n...\n
  2. parse\u53c2\u6570\u8bbe\u7f6e

    python train.py ... --model squeezenet1_0 --num_classes 1000 --pretrained False \\\n--ckpt_path ./squeezenet1_0_gpu.ckpt --keep_checkpoint_max 10 \\\n--ckpt_save_path ./ckpt/ --epoch_size 200 --dataset_sink_mode True \\\n--amp_level O0 ...\n
  3. \u5bf9\u5e94\u7684\u4ee3\u7801\u793a\u4f8b

    def train(args):\n    ...\n    network = create_model(model_name=args.model,\n        num_classes=args.num_classes,\n        in_channels=args.in_channels,\n        drop_rate=args.drop_rate,\n        drop_path_rate=args.drop_path_rate,\n        pretrained=args.pretrained,\n        checkpoint_path=args.ckpt_path,\n        ema=args.ema\n    )\n    ...\n
"},{"location":"zh/tutorials/configuration/#_6","title":"\u635f\u5931\u51fd\u6570","text":"
  1. \u53c2\u6570\u8bf4\u660e
  • loss\uff1a\u635f\u5931\u51fd\u6570\u7684\u7b80\u79f0\u3002

  • label_smoothing\uff1a\u6807\u7b7e\u5e73\u6ed1\u503c\uff0c\u7528\u4e8e\u8ba1\u7b97Loss\u65f6\u9632\u6b62\u6a21\u578b\u8fc7\u62df\u5408\u7684\u6b63\u5219\u5316\u624b\u6bb5\u3002

  1. yaml\u6587\u4ef6\u6837\u4f8b

    loss: 'CE'\nlabel_smoothing: 0.1\n...\n
  2. parse\u53c2\u6570\u8bbe\u7f6e

    python train.py ... --loss CE --label_smoothing 0.1 ...\n
  3. \u5bf9\u5e94\u7684\u4ee3\u7801\u793a\u4f8b

    def train(args):\n    ...\n    loss = create_loss(name=args.loss,\n        reduction=args.reduction,\n        label_smoothing=args.label_smoothing,\n        aux_factor=args.aux_factor\n    )\n    ...\n
"},{"location":"zh/tutorials/configuration/#_7","title":"\u5b66\u4e60\u7387\u7b56\u7565","text":"
  1. \u53c2\u6570\u8bf4\u660e
  • scheduler\uff1a\u5b66\u4e60\u7387\u7b56\u7565\u7684\u540d\u79f0\u3002

  • min_lr\uff1a\u5b66\u4e60\u7387\u7684\u6700\u5c0f\u503c\u3002

  • lr\uff1a\u5b66\u4e60\u7387\u7684\u6700\u5927\u503c\u3002

  • warmup_epochs\uff1a\u5b66\u4e60\u7387warmup\u7684\u8f6e\u6b21\u3002

  • decay_epochs\uff1a\u8fdb\u884c\u8870\u51cf\u7684step\u6570\u3002

  1. yaml\u6587\u4ef6\u6837\u4f8b

    scheduler: 'cosine_decay'\nmin_lr: 0.0\nlr: 0.01\nwarmup_epochs: 0\ndecay_epochs: 200\n...\n
  2. parse\u53c2\u6570\u8bbe\u7f6e

    python train.py ... --scheduler cosine_decay --min_lr 0.0 --lr 0.01 \\\n--warmup_epochs 0 --decay_epochs 200 ...\n
  3. \u5bf9\u5e94\u7684\u4ee3\u7801\u793a\u4f8b

    def train(args):\n    ...\n    lr_scheduler = create_scheduler(num_batches,\n        scheduler=args.scheduler,\n        lr=args.lr,\n        min_lr=args.min_lr,\n        warmup_epochs=args.warmup_epochs,\n        warmup_factor=args.warmup_factor,\n        decay_epochs=args.decay_epochs,\n        decay_rate=args.decay_rate,\n        milestones=args.multi_step_decay_milestones,\n        num_epochs=args.epoch_size,\n        lr_epoch_stair=args.lr_epoch_stair\n    )\n    ...\n
"},{"location":"zh/tutorials/configuration/#_8","title":"\u4f18\u5316\u5668","text":"
  1. \u53c2\u6570\u8bf4\u660e
  • opt\uff1a\u4f18\u5316\u5668\u540d\u79f0\u3002

  • filter_bias_and_bn\uff1a\u53c2\u6570\u4e2d\u662f\u5426\u5305\u542bbias\uff0cgamma\u6216\u8005beta\u3002

  • momentum\uff1a\u79fb\u52a8\u5e73\u5747\u7684\u52a8\u91cf\u3002

  • weight_decay\uff1a\u6743\u91cd\u8870\u51cf\uff08L2 penalty\uff09\u3002

  • loss_scale\uff1a\u68af\u5ea6\u7f29\u653e\u7cfb\u6570

  • use_nesterov\uff1a\u662f\u5426\u4f7f\u7528Nesterov Accelerated Gradient (NAG)\u7b97\u6cd5\u66f4\u65b0\u68af\u5ea6\u3002

  1. yaml\u6587\u4ef6\u6837\u4f8b

    opt: 'momentum'\nfilter_bias_and_bn: True\nmomentum: 0.9\nweight_decay: 0.00007\nloss_scale: 1024\nuse_nesterov: False\n...\n
  2. parse\u53c2\u6570\u8bbe\u7f6e

    python train.py ... --opt momentum --filter_bias_and_bn True --weight_decay 0.00007 \\\n--loss_scale 1024 --use_nesterov False ...\n
  3. \u5bf9\u5e94\u7684\u4ee3\u7801\u793a\u4f8b

    def train(args):\n    ...\n    if args.ema:\n        optimizer = create_optimizer(network.trainable_params(),\n            opt=args.opt,\n            lr=lr_scheduler,\n            weight_decay=args.weight_decay,\n            momentum=args.momentum,\n            nesterov=args.use_nesterov,\n            filter_bias_and_bn=args.filter_bias_and_bn,\n            loss_scale=args.loss_scale,\n            checkpoint_path=opt_ckpt_path,\n            eps=args.eps\n        )\n    else:\n        optimizer = create_optimizer(network.trainable_params(),\n            opt=args.opt,\n            lr=lr_scheduler,\n            weight_decay=args.weight_decay,\n            momentum=args.momentum,\n            nesterov=args.use_nesterov,\n            filter_bias_and_bn=args.filter_bias_and_bn,\n            checkpoint_path=opt_ckpt_path,\n            eps=args.eps\n        )\n    ...\n
"},{"location":"zh/tutorials/configuration/#yamlparse","title":"Yaml\u548cParse\u7ec4\u5408\u4f7f\u7528","text":"

\u4f7f\u7528parse\u8bbe\u7f6e\u53c2\u6570\u53ef\u4ee5\u8986\u76d6yaml\u6587\u4ef6\u4e2d\u7684\u53c2\u6570\u8bbe\u7f6e\u3002\u4ee5\u4e0b\u9762\u7684shell\u547d\u4ee4\u4e3a\u4f8b\uff0c

python train.py -c ./configs/squeezenet/squeezenet_1.0_gpu.yaml --data_dir ./data\n

\u4e0a\u9762\u7684\u547d\u4ee4\u5c06args.data_dir\u53c2\u6570\u7684\u503c\u7531yaml\u6587\u4ef6\u4e2d\u7684 ./imagenet2012 \u8986\u76d6\u4e3a ./data\u3002

"},{"location":"zh/tutorials/deployment/","title":"\u90e8\u7f72\u63a8\u7406\u670d\u52a1","text":"

MindSpore Serving\u662f\u4e00\u4e2a\u8f7b\u91cf\u7ea7\u3001\u9ad8\u6027\u80fd\u7684\u63a8\u7406\u670d\u52a1\u6a21\u5757\uff0c\u65e8\u5728\u5e2e\u52a9MindSpore\u5f00\u53d1\u8005\u5728\u751f\u4ea7\u73af\u5883\u4e2d\u9ad8\u6548\u90e8\u7f72\u5728\u7ebf\u63a8\u7406\u670d\u52a1\u3002\u5f53\u7528\u6237\u4f7f\u7528MindSpore\u5b8c\u6210\u6a21\u578b\u8bad\u7ec3\u540e\uff0c\u5bfc\u51faMindSpore\u6a21\u578b\uff0c\u5373\u53ef\u4f7f\u7528MindSpore Serving\u521b\u5efa\u8be5\u6a21\u578b\u7684\u63a8\u7406\u670d\u52a1\u3002

\u672c\u6587\u4ee5mobilenet_v2_100\u7f51\u7edc\u4e3a\u4f8b\uff0c\u6f14\u793a\u57fa\u4e8eMindSpore Serving\u8fdb\u884c\u90e8\u7f72\u63a8\u7406\u670d\u52a1\u7684\u65b9\u6cd5\u3002

"},{"location":"zh/tutorials/deployment/#_2","title":"\u73af\u5883\u51c6\u5907","text":"

\u8fdb\u884c\u90e8\u7f72\u524d\uff0c\u9700\u786e\u4fdd\u5df2\u7ecf\u6b63\u786e\u5b89\u88c5\u4e86MindSpore Serving\uff0c\u5e76\u914d\u7f6e\u4e86\u73af\u5883\u53d8\u91cf\u3002MindSpore Serving\u5b89\u88c5\u548c\u914d\u7f6e\u53ef\u4ee5\u53c2\u8003MindSpore Serving\u5b89\u88c5\u9875\u9762 \u3002

"},{"location":"zh/tutorials/deployment/#_3","title":"\u6a21\u578b\u5bfc\u51fa","text":"

\u5b9e\u73b0\u8de8\u5e73\u53f0\u6216\u786c\u4ef6\u6267\u884c\u63a8\u7406\uff08\u5982\u6607\u817eAI\u5904\u7406\u5668\u3001MindSpore\u7aef\u4fa7\u3001GPU\u7b49\uff09\uff0c\u9700\u8981\u901a\u8fc7\u7f51\u7edc\u5b9a\u4e49\u548cCheckPoint\u751f\u6210MindIR\u683c\u5f0f\u6a21\u578b\u6587\u4ef6\u3002\u5728MindSpore\u4e2d\uff0c\u7f51\u7edc\u6a21\u578b\u5bfc\u51fa\u7684\u51fd\u6570\u4e3aexport\uff0c\u4e3b\u8981\u53c2\u6570\u5982\u4e0b\u6240\u793a\uff1a

  • net\uff1aMindSpore\u7f51\u7edc\u7ed3\u6784\u3002
  • inputs\uff1a\u7f51\u7edc\u7684\u8f93\u5165\uff0c\u652f\u6301\u8f93\u5165\u7c7b\u578b\u4e3aTensor\u3002\u5f53\u8f93\u5165\u6709\u591a\u4e2a\u65f6\uff0c\u9700\u8981\u4e00\u8d77\u4f20\u5165\uff0c\u5982ms.export(network, ms.Tensor(input1), ms.Tensor(input2), file_name='network', file_format='MINDIR')\u3002
  • file_name\uff1a\u5bfc\u51fa\u6a21\u578b\u7684\u6587\u4ef6\u540d\u79f0\uff0c\u5982\u679cfile_name\u6ca1\u6709\u5305\u542b\u5bf9\u5e94\u7684\u540e\u7f00\u540d(\u5982.mindir)\uff0c\u8bbe\u7f6efile_format\u540e\u7cfb\u7edf\u4f1a\u4e3a\u6587\u4ef6\u540d\u81ea\u52a8\u6dfb\u52a0\u540e\u7f00\u3002
  • file_format\uff1aMindSpore\u76ee\u524d\u652f\u6301\u5bfc\u51fa\u201dAIR\u201d\uff0c\u201dONNX\u201d\u548c\u201dMINDIR\u201d\u683c\u5f0f\u7684\u6a21\u578b\u3002

\u4e0b\u9762\u4ee3\u7801\u4ee5mobilenet_v2_100\u4e3a\u4f8b\uff0c\u5bfc\u51faMindCV\u7684\u9884\u8bad\u7ec3\u7f51\u7edc\u6a21\u578b\uff0c\u83b7\u5f97MindIR\u683c\u5f0f\u6a21\u578b\u6587\u4ef6\u3002

from mindcv.models import create_model\nimport numpy as np\nimport mindspore as ms\n\nmodel = create_model(model_name='mobilenet_v2_100', num_classes=1000, pretrained=True)\n\ninput_np = np.random.uniform(0.0, 1.0, size=[1, 3, 224, 224]).astype(np.float32)\n\n# \u5bfc\u51fa\u6587\u4ef6mobilenet_v2_100.mindir\u5230\u5f53\u524d\u6587\u4ef6\u5939\nms.export(model, ms.Tensor(input_np), file_name='mobilenet_v2_100', file_format='MINDIR')\n
"},{"location":"zh/tutorials/deployment/#serving","title":"\u90e8\u7f72Serving\u63a8\u7406\u670d\u52a1","text":""},{"location":"zh/tutorials/deployment/#_4","title":"\u914d\u7f6e\u670d\u52a1","text":"

\u542f\u52a8Serving\u670d\u52a1\uff0c\u6267\u884c\u672c\u6559\u7a0b\u9700\u8981\u5982\u4e0b\u6587\u4ef6\u5217\u8868:

demo\n\u251c\u2500\u2500 mobilenet_v2_100\n\u2502   \u251c\u2500\u2500 1\n\u2502   \u2502   \u2514\u2500\u2500 mobilenet_v2_100.mindir\n\u2502   \u2514\u2500\u2500 servable_config.py\n\u2502\u2500\u2500 serving_server.py\n\u251c\u2500\u2500 serving_client.py\n\u251c\u2500\u2500 imagenet1000_clsidx_to_labels.txt\n\u2514\u2500\u2500 test_image\n    \u251c\u2500 dog\n    \u2502   \u251c\u2500 dog.jpg\n    \u2502   \u2514\u2500 \u2026\u2026\n    \u2514\u2500 \u2026\u2026\n
  • mobilenet_v2_100\u4e3a\u6a21\u578b\u6587\u4ef6\u5939\uff0c\u6587\u4ef6\u5939\u540d\u5373\u4e3a\u6a21\u578b\u540d\u3002
  • mobilenet_v2_100.mindir\u4e3a\u4e0a\u4e00\u6b65\u7f51\u7edc\u751f\u6210\u7684\u6a21\u578b\u6587\u4ef6\uff0c\u653e\u7f6e\u5728\u6587\u4ef6\u59391\u4e0b\uff0c1\u4e3a\u7248\u672c\u53f7\uff0c\u4e0d\u540c\u7684\u7248\u672c\u653e\u7f6e\u5728\u4e0d\u540c\u7684\u6587\u4ef6\u5939\u4e0b\uff0c\u7248\u672c\u53f7\u9700\u4ee5\u7eaf\u6570\u5b57\u4e32\u547d\u540d\uff0c\u9ed8\u8ba4\u914d\u7f6e\u4e0b\u542f\u52a8\u6700\u5927\u6570\u503c\u7684\u7248\u672c\u53f7\u7684\u6a21\u578b\u6587\u4ef6\u3002
  • servable_config.py\u4e3a\u6a21\u578b\u914d\u7f6e\u811a\u672c\uff0c\u5bf9\u6a21\u578b\u8fdb\u884c\u58f0\u660e\u3001\u5165\u53c2\u548c\u51fa\u53c2\u5b9a\u4e49\u3002
  • serving_server.py\u4e3a\u542f\u52a8\u670d\u52a1\u811a\u672c\u6587\u4ef6\u3002
  • serving_client.py\u4e3a\u542f\u52a8\u5ba2\u6237\u7aef\u811a\u672c\u6587\u4ef6\u3002
  • imagenet1000_clsidx_to_labels.txt\u4e3aImageNet\u6570\u636e\u96c61000\u4e2a\u7c7b\u522b\u7684\u7d22\u5f15\uff0c\u53ef\u4ee5\u5728examples/data/\u4e2d\u5f97\u5230\u3002
  • test_image\u4e2d\u4e3a\u6d4b\u8bd5\u56fe\u7247\uff0c\u53ef\u4ee5\u5728README\u4e2d\u5f97\u5230\u3002

\u5176\u4e2d\uff0c\u6a21\u578b\u914d\u7f6e\u6587\u4ef6servable_config.py\u5185\u5bb9\u5982\u4e0b\uff1a

from mindspore_serving.server import register\n\n# \u8fdb\u884c\u6a21\u578b\u58f0\u660e\uff0c\u5176\u4e2ddeclare_model\u5165\u53c2model_file\u6307\u793a\u6a21\u578b\u7684\u6587\u4ef6\u540d\u79f0\uff0cmodel_format\u6307\u793a\u6a21\u578b\u7684\u6a21\u578b\u7c7b\u522b\nmodel = register.declare_model(model_file=\"mobilenet_v2_100.mindir\", model_format=\"MindIR\")\n\n# Servable\u65b9\u6cd5\u7684\u5165\u53c2\u7531Python\u65b9\u6cd5\u7684\u5165\u53c2\u6307\u5b9a\uff0cServable\u65b9\u6cd5\u7684\u51fa\u53c2\u7531register_method\u7684output_names\u6307\u5b9a\n@register.register_method(output_names=[\"score\"])\ndef predict(image):\n    x = register.add_stage(model, image, outputs_count=1)\n    return x\n
"},{"location":"zh/tutorials/deployment/#_5","title":"\u542f\u52a8\u670d\u52a1","text":"

MindSpore\u7684server\u51fd\u6570\u63d0\u4f9b\u4e24\u79cd\u670d\u52a1\u90e8\u7f72\uff0c\u4e00\u79cd\u662fgRPC\u65b9\u5f0f\uff0c\u4e00\u79cd\u662f\u901a\u8fc7RESTful\u65b9\u5f0f\uff0c\u672c\u6559\u7a0b\u4ee5gRPC\u65b9\u5f0f\u4e3a\u4f8b\u3002\u670d\u52a1\u542f\u52a8\u811a\u672cserving_server.py\u628a\u672c\u5730\u76ee\u5f55\u4e0b\u7684mobilenet_v2_100\u90e8\u7f72\u5230\u8bbe\u59070\uff0c\u5e76\u542f\u52a8\u5730\u5740\u4e3a127.0.0.1:5500\u7684gRPC\u670d\u52a1\u5668\u3002\u811a\u672c\u6587\u4ef6\u5185\u5bb9\u5982\u4e0b\uff1a

import os\nimport sys\nfrom mindspore_serving import server\n\ndef start():\n    servable_dir = os.path.dirname(os.path.realpath(sys.argv[0]))\n\n    servable_config = server.ServableStartConfig(servable_directory=servable_dir, servable_name=\"mobilenet_v2_100\",\n                                                 device_ids=0)\n    server.start_servables(servable_configs=servable_config)\n    server.start_grpc_server(address=\"127.0.0.1:5500\")\n\nif __name__ == \"__main__\":\n    start()\n

\u5f53\u670d\u52a1\u7aef\u6253\u5370\u5982\u4e0b\u65e5\u5fd7\u65f6\uff0c\u8868\u793aServing gRPC\u670d\u52a1\u542f\u52a8\u6210\u529f\u3002

Serving gRPC server start success, listening on 127.0.0.1:5500\n
"},{"location":"zh/tutorials/deployment/#_6","title":"\u6267\u884c\u63a8\u7406","text":"

\u4f7f\u7528serving_client.py\uff0c\u542f\u52a8Python\u5ba2\u6237\u7aef\u3002\u5ba2\u6237\u7aef\u811a\u672c\u4f7f\u7528mindcv.data\u7684create_transforms, create_dataset\u548ccreate_loader\u51fd\u6570\uff0c\u8fdb\u884c\u56fe\u7247\u9884\u5904\u7406\uff0c\u518d\u4f20\u9001\u7ed9Serving\u670d\u52a1\u5668\u3002\u5bf9\u670d\u52a1\u5668\u8fd4\u56de\u7684\u7ed3\u679c\u8fdb\u884c\u540e\u5904\u7406\uff0c\u6253\u5370\u56fe\u7247\u7684\u9884\u6d4b\u6807\u7b7e\u3002

import os\nfrom mindspore_serving.client import Client\nimport numpy as np\nfrom mindcv.data import create_transforms, create_dataset, create_loader\n\nnum_workers = 1\n\n# \u6570\u636e\u96c6\u76ee\u5f55\u8def\u5f84\ndata_dir = \"./test_image/\"\n\ndataset = create_dataset(root=data_dir, split='', num_parallel_workers=num_workers)\ntransforms_list = create_transforms(dataset_name='ImageNet', is_training=False)\ndata_loader = create_loader(\n    dataset=dataset,\n    batch_size=1,\n    is_training=False,\n    num_classes=1000,\n    transform=transforms_list,\n    num_parallel_workers=num_workers\n)\nwith open(\"imagenet1000_clsidx_to_labels.txt\") as f:\n    idx2label = eval(f.read())\n\ndef postprocess(score):\n    max_idx = np.argmax(score)\n    return idx2label[max_idx]\n\ndef predict():\n    client = Client(\"127.0.0.1:5500\", \"mobilenet_v2_100\", \"predict\")\n    instances = []\n    images, _ = next(data_loader.create_tuple_iterator())\n    image_np = images.asnumpy().squeeze()\n    instances.append({\"image\": image_np})\n    result = client.infer(instances)\n\n    for instance in result:\n        label = postprocess(instance[\"score\"])\n        print(label)\n\nif __name__ == '__main__':\n    predict()\n

\u6267\u884c\u540e\u663e\u793a\u5982\u4e0b\u8fd4\u56de\u503c\uff0c\u8bf4\u660eServing\u670d\u52a1\u5df2\u6b63\u786e\u6267\u884cmobilenet_v2_100\u7f51\u7edc\u6a21\u578b\u7684\u63a8\u7406\u3002

Labrador retriever\n

"},{"location":"zh/tutorials/finetune/","title":"\u81ea\u5b9a\u4e49\u6570\u636e\u96c6\u4e0a\u7684\u6a21\u578b\u5fae\u8c03\u8bad\u7ec3","text":"

\u5728\u6b64\u6559\u7a0b\u4e2d\uff0c\u60a8\u5c06\u5b66\u4f1a\u5982\u4f55\u4f7f\u7528MindCV\u5957\u4ef6\u8fdb\u884c\u8fc1\u79fb\u5b66\u4e60\uff0c\u4ee5\u89e3\u51b3\u81ea\u5b9a\u4e49\u6570\u636e\u96c6\u4e0a\u7684\u56fe\u50cf\u5206\u7c7b\u95ee\u9898\u3002 \u5728\u6df1\u5ea6\u5b66\u4e60\u4efb\u52a1\u4e2d\uff0c\u5e38\u89c1\u9047\u5230\u8bad\u7ec3\u6570\u636e\u4e0d\u8db3\u7684\u95ee\u9898\uff0c\u6b64\u65f6\u76f4\u63a5\u8bad\u7ec3\u6574\u4e2a\u7f51\u7edc\u5f80\u5f80\u96be\u4ee5\u8fbe\u5230\u7406\u60f3\u7684\u7cbe\u5ea6\u3002 \u4e00\u4e2a\u6bd4\u8f83\u597d\u7684\u505a\u6cd5\u662f\uff0c\u4f7f\u7528\u4e00\u4e2a\u5728\u5927\u89c4\u6a21\u6570\u636e\u96c6\u4e0a(\u4e0e\u4efb\u52a1\u6570\u636e\u8f83\u4e3a\u63a5\u8fd1)\u9884\u8bad\u7ec3\u597d\u7684\u6a21\u578b\uff0c\u7136\u540e\u4f7f\u7528\u8be5\u6a21\u578b\u6765\u521d\u59cb\u5316\u7f51\u7edc\u7684\u6743\u91cd\u53c2\u6570\u6216\u4f5c\u4e3a\u56fa\u5b9a\u7279\u5f81\u63d0\u53d6\u5668\u5e94\u7528\u4e8e\u7279\u5b9a\u7684\u4efb\u52a1\u4e2d\u3002

\u6b64\u6559\u7a0b\u5c06\u4ee5\u4f7f\u7528ImageNet\u4e0a\u9884\u8bad\u7ec3\u7684DenseNet\u6a21\u578b\u4e3a\u4f8b\uff0c\u4ecb\u7ecd\u4e24\u79cd\u4e0d\u540c\u7684\u5fae\u8c03\u7b56\u7565\uff0c\u89e3\u51b3\u5c0f\u6837\u672c\u60c5\u51b5\u4e0b\u72fc\u548c\u72d7\u7684\u56fe\u50cf\u5206\u7c7b\u95ee\u9898:

  1. \u6574\u4f53\u6a21\u578b\u5fae\u8c03\u3002
  2. \u51bb\u7ed3\u7279\u5f81\u7f51\u7edc(freeze backbone)\uff0c\u53ea\u5fae\u8c03\u5206\u7c7b\u5668\u3002

\u8fc1\u79fb\u5b66\u4e60\u8be6\u7ec6\u5185\u5bb9\u89c1Stanford University CS231n

"},{"location":"zh/tutorials/finetune/#_2","title":"\u6570\u636e\u51c6\u5907","text":""},{"location":"zh/tutorials/finetune/#_3","title":"\u4e0b\u8f7d\u6570\u636e\u96c6","text":"

\u4e0b\u8f7d\u6848\u4f8b\u6240\u7528\u5230\u7684\u72d7\u4e0e\u72fc\u5206\u7c7b\u6570\u636e\u96c6\uff0c \u6bcf\u4e2a\u7c7b\u522b\u5404\u6709120\u5f20\u8bad\u7ec3\u56fe\u50cf\u4e0e30\u5f20\u9a8c\u8bc1\u56fe\u50cf\u3002\u4f7f\u7528mindcv.utils.download\u63a5\u53e3\u4e0b\u8f7d\u6570\u636e\u96c6\uff0c\u5e76\u5c06\u4e0b\u8f7d\u540e\u7684\u6570\u636e\u96c6\u81ea\u52a8\u89e3\u538b\u5230\u5f53\u524d\u76ee\u5f55\u4e0b\u3002

import os\nfrom mindcv.utils.download import DownLoad\n\ndataset_url = \"https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/notebook/datasets/intermediate/Canidae_data.zip\"\nroot_dir = \"./\"\n\nif not os.path.exists(os.path.join(root_dir, 'data/Canidae')):\n    DownLoad().download_and_extract_archive(dataset_url, root_dir)\n

\u6570\u636e\u96c6\u7684\u76ee\u5f55\u7ed3\u6784\u5982\u4e0b\uff1a

data/\n\u2514\u2500\u2500 Canidae\n    \u251c\u2500\u2500 train\n    \u2502   \u251c\u2500\u2500 dogs\n    \u2502   \u2514\u2500\u2500 wolves\n    \u2514\u2500\u2500 val\n        \u251c\u2500\u2500 dogs\n        \u2514\u2500\u2500 wolves\n
"},{"location":"zh/tutorials/finetune/#_4","title":"\u6570\u636e\u96c6\u52a0\u8f7d\u53ca\u5904\u7406","text":""},{"location":"zh/tutorials/finetune/#_5","title":"\u81ea\u5b9a\u4e49\u6570\u636e\u96c6\u7684\u52a0\u8f7d","text":"

\u901a\u8fc7\u8c03\u7528mindcv.data\u4e2d\u7684create_dataset\u51fd\u6570\uff0c\u6211\u4eec\u53ef\u8f7b\u677e\u5730\u52a0\u8f7d\u9884\u8bbe\u7684\u6570\u636e\u96c6\u4ee5\u53ca\u81ea\u5b9a\u4e49\u7684\u6570\u636e\u96c6\u3002

  • \u5f53\u53c2\u6570name\u8bbe\u4e3a\u7a7a\u65f6\uff0c\u6307\u5b9a\u4e3a\u81ea\u5b9a\u4e49\u6570\u636e\u96c6\u3002(\u9ed8\u8ba4\u503c)
  • \u5f53\u53c2\u6570name\u8bbe\u4e3aMNIST, CIFAR10\u7b49\u6807\u51c6\u6570\u636e\u96c6\u540d\u79f0\u65f6\uff0c\u6307\u5b9a\u4e3a\u9884\u8bbe\u6570\u636e\u96c6\u3002

\u540c\u65f6\uff0c\u6211\u4eec\u9700\u8981\u8bbe\u5b9a\u6570\u636e\u96c6\u7684\u8def\u5f84data_dir\u548c\u6570\u636e\u5207\u5206\u7684\u540d\u79f0split (\u5982train, val)\uff0c\u4ee5\u52a0\u8f7d\u5bf9\u5e94\u7684\u8bad\u7ec3\u96c6\u6216\u8005\u9a8c\u8bc1\u96c6\u3002

from mindcv.data import create_dataset, create_transforms, create_loader\n\nnum_workers = 8\n\n# \u6570\u636e\u96c6\u76ee\u5f55\u8def\u5f84\ndata_dir = \"./data/Canidae/\"\n\n# \u52a0\u8f7d\u81ea\u5b9a\u4e49\u6570\u636e\u96c6\ndataset_train = create_dataset(root=data_dir, split='train', num_parallel_workers=num_workers)\ndataset_val = create_dataset(root=data_dir, split='val', num_parallel_workers=num_workers)\n

\u6ce8\u610f: \u81ea\u5b9a\u4e49\u6570\u636e\u96c6\u7684\u76ee\u5f55\u7ed3\u6784\u5e94\u4e0eImageNet\u4e00\u6837\uff0c\u5373root -> split -> class -> image \u7684\u5c42\u6b21\u7ed3\u6784

DATASET_NAME\n    \u251c\u2500\u2500 split1(e.g. train)/\n    \u2502  \u251c\u2500\u2500 class1/\n    \u2502  \u2502   \u251c\u2500\u2500 000001.jpg\n    \u2502  \u2502   \u251c\u2500\u2500 000002.jpg\n    \u2502  \u2502   \u2514\u2500\u2500 ....\n    \u2502  \u2514\u2500\u2500 class2/\n    \u2502      \u251c\u2500\u2500 000001.jpg\n    \u2502      \u251c\u2500\u2500 000002.jpg\n    \u2502      \u2514\u2500\u2500 ....\n    \u2514\u2500\u2500 split2/\n       \u251c\u2500\u2500 class1/\n       \u2502   \u251c\u2500\u2500 000001.jpg\n       \u2502   \u251c\u2500\u2500 000002.jpg\n       \u2502   \u2514\u2500\u2500 ....\n       \u2514\u2500\u2500 class2/\n           \u251c\u2500\u2500 000001.jpg\n           \u251c\u2500\u2500 000002.jpg\n           \u2514\u2500\u2500 ....\n
"},{"location":"zh/tutorials/finetune/#_6","title":"\u6570\u636e\u5904\u7406\u53ca\u589e\u5f3a","text":"

\u9996\u5148\u6211\u4eec\u901a\u8fc7\u8c03\u7528create_transforms\u51fd\u6570, \u83b7\u5f97\u9884\u8bbe\u7684\u6570\u636e\u5904\u7406\u548c\u589e\u5f3a\u7b56\u7565(transform list)\uff0c\u6b64\u4efb\u52a1\u4e2d\uff0c\u56e0\u72fc\u72d7\u56fe\u50cf\u548cImageNet\u6570\u636e\u4e00\u81f4\uff08\u5373domain\u4e00\u81f4\uff09\uff0c\u6211\u4eec\u6307\u5b9a\u53c2\u6570dataset_name\u4e3aImageNet\uff0c\u76f4\u63a5\u7528\u9884\u8bbe\u597d\u7684ImageNet\u7684\u6570\u636e\u5904\u7406\u548c\u56fe\u50cf\u589e\u5f3a\u7b56\u7565\u3002create_transforms \u540c\u6837\u652f\u6301\u591a\u79cd\u81ea\u5b9a\u4e49\u7684\u5904\u7406\u548c\u589e\u5f3a\u64cd\u4f5c\uff0c\u4ee5\u53ca\u81ea\u52a8\u589e\u5f3a\u7b56\u7565(AutoAug)\u3002\u8be6\u89c1API\u8bf4\u660e\u3002

\u6211\u4eec\u5c06\u5f97\u5230\u7684transform list\u4f20\u5165create_loader()\uff0c\u5e76\u6307\u5b9abatch_size\u548c\u5176\u4ed6\u53c2\u6570\uff0c\u5373\u53ef\u5b8c\u6210\u8bad\u7ec3\u548c\u9a8c\u8bc1\u6570\u636e\u7684\u51c6\u5907\uff0c\u8fd4\u56deDataset Object\uff0c\u4f5c\u4e3a\u6a21\u578b\u7684\u8f93\u5165\u3002

# \u5b9a\u4e49\u548c\u83b7\u53d6\u6570\u636e\u5904\u7406\u53ca\u589e\u5f3a\u64cd\u4f5c\ntrans_train = create_transforms(dataset_name='ImageNet', is_training=True)\ntrans_val = create_transforms(dataset_name='ImageNet',is_training=False)\n\nloader_train = create_loader(\n    dataset=dataset_train,\n    batch_size=16,\n    is_training=True,\n    num_classes=2,\n    transform=trans_train,\n    num_parallel_workers=num_workers,\n)\nloader_val = create_loader(\n    dataset=dataset_val,\n    batch_size=5,\n    is_training=True,\n    num_classes=2,\n    transform=trans_val,\n    num_parallel_workers=num_workers,\n)\n
"},{"location":"zh/tutorials/finetune/#_7","title":"\u6570\u636e\u96c6\u53ef\u89c6\u5316","text":"

\u5bf9\u4e8ecreate_loader\u63a5\u53e3\u8fd4\u56de\u7684\u5b8c\u6210\u6570\u636e\u52a0\u8f7d\u7684Dataset object\uff0c\u6211\u4eec\u53ef\u4ee5\u901a\u8fc7 create_tuple_iterator \u63a5\u53e3\u521b\u5efa\u6570\u636e\u8fed\u4ee3\u5668\uff0c\u4f7f\u7528 next \u8fed\u4ee3\u8bbf\u95ee\u6570\u636e\u96c6\uff0c\u8bfb\u53d6\u5230\u4e00\u4e2abatch\u7684\u6570\u636e\u3002

images, labels = next(loader_train.create_tuple_iterator())\nprint(\"Tensor of image\", images.shape)\nprint(\"Labels:\", labels)\n
Tensor of image (16, 3, 224, 224)\nLabels: [0 1 1 1 1 0 0 0 0 0 1 0 1 0 1 1]\n

\u5bf9\u83b7\u53d6\u5230\u7684\u56fe\u50cf\u53ca\u6807\u7b7e\u6570\u636e\u8fdb\u884c\u53ef\u89c6\u5316\uff0c\u6807\u9898\u4e3a\u56fe\u50cf\u5bf9\u5e94\u7684label\u540d\u79f0\u3002

import matplotlib.pyplot as plt\nimport numpy as np\n\n# class_name\u5bf9\u5e94label\uff0c\u6309\u6587\u4ef6\u5939\u5b57\u7b26\u4e32\u4ece\u5c0f\u5230\u5927\u7684\u987a\u5e8f\u6807\u8bb0label\nclass_name = {0: \"dogs\", 1: \"wolves\"}\n\nplt.figure(figsize=(15, 7))\nfor i in range(len(labels)):\n    # \u83b7\u53d6\u56fe\u50cf\u53ca\u5176\u5bf9\u5e94\u7684label\n    data_image = images[i].asnumpy()\n    data_label = labels[i]\n    # \u5904\u7406\u56fe\u50cf\u4f9b\u5c55\u793a\u4f7f\u7528\n    data_image = np.transpose(data_image, (1, 2, 0))\n    mean = np.array([0.485, 0.456, 0.406])\n    std = np.array([0.229, 0.224, 0.225])\n    data_image = std * data_image + mean\n    data_image = np.clip(data_image, 0, 1)\n    # \u663e\u793a\u56fe\u50cf\n    plt.subplot(3, 6, i + 1)\n    plt.imshow(data_image)\n    plt.title(class_name[int(labels[i].asnumpy())])\n    plt.axis(\"off\")\n\nplt.show()\n

"},{"location":"zh/tutorials/finetune/#_8","title":"\u6a21\u578b\u5fae\u8c03","text":""},{"location":"zh/tutorials/finetune/#1","title":"1. \u6574\u4f53\u6a21\u578b\u5fae\u8c03","text":""},{"location":"zh/tutorials/finetune/#_9","title":"\u9884\u8bad\u7ec3\u6a21\u578b\u52a0\u8f7d","text":"

\u6211\u4eec\u4f7f\u7528mindcv.models.densenet\u4e2d\u5b9a\u4e49DenseNet121\u7f51\u7edc\uff0c\u5f53\u63a5\u53e3\u4e2d\u7684pretrained\u53c2\u6570\u8bbe\u7f6e\u4e3aTrue\u65f6\uff0c\u53ef\u4ee5\u81ea\u52a8\u4e0b\u8f7d\u7f51\u7edc\u6743\u91cd\u3002 \u7531\u4e8e\u8be5\u9884\u8bad\u7ec3\u6a21\u578b\u662f\u9488\u5bf9ImageNet\u6570\u636e\u96c6\u4e2d\u76841000\u4e2a\u7c7b\u522b\u8fdb\u884c\u5206\u7c7b\u7684\uff0c\u8fd9\u91cc\u6211\u4eec\u8bbe\u5b9anum_classes=2, DenseNet\u7684classifier(\u5373\u6700\u540e\u7684FC\u5c42)\u8f93\u51fa\u8c03\u6574\u4e3a\u4e24\u7ef4\uff0c\u6b64\u65f6\u53ea\u52a0\u8f7dbackbone\u7684\u9884\u8bad\u7ec3\u6743\u91cd\uff0c\u800cclassifier\u5219\u4f7f\u7528\u521d\u59cb\u503c\u3002

from mindcv.models import create_model\n\nnetwork = create_model(model_name='densenet121', num_classes=2, pretrained=True)\n

DenseNet\u7684\u5177\u4f53\u7ed3\u6784\u53ef\u53c2\u89c1DenseNet\u8bba\u6587\u3002

"},{"location":"zh/tutorials/finetune/#_10","title":"\u6a21\u578b\u8bad\u7ec3","text":"

\u4f7f\u7528\u5df2\u52a0\u8f7d\u5904\u7406\u597d\u7684\u5e26\u6807\u7b7e\u7684\u72fc\u548c\u72d7\u56fe\u50cf\uff0c\u5bf9DenseNet\u8fdb\u884c\u5fae\u8c03\u7f51\u7edc\u3002\u6ce8\u610f\uff0c\u5bf9\u6574\u4f53\u6a21\u578b\u505a\u5fae\u8c03\u65f6\uff0c\u5e94\u4f7f\u7528\u8f83\u5c0f\u7684learning rate\u3002

from mindcv.loss import create_loss\nfrom mindcv.optim import create_optimizer\nfrom mindcv.scheduler import create_scheduler\nfrom mindspore import Model, LossMonitor, TimeMonitor\n\n# \u5b9a\u4e49\u4f18\u5316\u5668\u548c\u635f\u5931\u51fd\u6570\nopt = create_optimizer(network.trainable_params(), opt='adam', lr=1e-4)\nloss = create_loss(name='CE')\n\n# \u5b9e\u4f8b\u5316\u6a21\u578b\nmodel = Model(network, loss_fn=loss, optimizer=opt, metrics={'accuracy'})\nmodel.train(10, loader_train, callbacks=[LossMonitor(5), TimeMonitor(5)], dataset_sink_mode=False)\n
epoch: 1 step: 5, loss is 0.5195528864860535\nepoch: 1 step: 10, loss is 0.2654373049736023\nepoch: 1 step: 15, loss is 0.28758567571640015\nTrain epoch time: 17270.144 ms, per step time: 1151.343 ms\nepoch: 2 step: 5, loss is 0.1807008981704712\nepoch: 2 step: 10, loss is 0.1700802594423294\nepoch: 2 step: 15, loss is 0.09752683341503143\nTrain epoch time: 1372.549 ms, per step time: 91.503 ms\nepoch: 3 step: 5, loss is 0.13594701886177063\nepoch: 3 step: 10, loss is 0.03628234937787056\nepoch: 3 step: 15, loss is 0.039737217128276825\nTrain epoch time: 1453.237 ms, per step time: 96.882 ms\nepoch: 4 step: 5, loss is 0.014213413000106812\nepoch: 4 step: 10, loss is 0.030747078359127045\nepoch: 4 step: 15, loss is 0.0798817127943039\nTrain epoch time: 1331.237 ms, per step time: 88.749 ms\nepoch: 5 step: 5, loss is 0.009510636329650879\nepoch: 5 step: 10, loss is 0.02603740245103836\nepoch: 5 step: 15, loss is 0.051846928894519806\nTrain epoch time: 1312.737 ms, per step time: 87.516 ms\nepoch: 6 step: 5, loss is 0.1163717582821846\nepoch: 6 step: 10, loss is 0.02439398318529129\nepoch: 6 step: 15, loss is 0.02564268559217453\nTrain epoch time: 1434.704 ms, per step time: 95.647 ms\nepoch: 7 step: 5, loss is 0.013310655951499939\nepoch: 7 step: 10, loss is 0.02289542555809021\nepoch: 7 step: 15, loss is 0.1992517113685608\nTrain epoch time: 1275.935 ms, per step time: 85.062 ms\nepoch: 8 step: 5, loss is 0.015928998589515686\nepoch: 8 step: 10, loss is 0.011409260332584381\nepoch: 8 step: 15, loss is 0.008141174912452698\nTrain epoch time: 1323.102 ms, per step time: 88.207 ms\nepoch: 9 step: 5, loss is 0.10395607352256775\nepoch: 9 step: 10, loss is 0.23055407404899597\nepoch: 9 step: 15, loss is 0.04896317049860954\nTrain epoch time: 1261.067 ms, per step time: 84.071 ms\nepoch: 10 step: 5, loss is 0.03162381425499916\nepoch: 10 step: 10, loss is 0.13094250857830048\nepoch: 10 step: 15, loss is 0.020028553903102875\nTrain epoch time: 1217.958 ms, per step time: 81.197 ms\n
"},{"location":"zh/tutorials/finetune/#_11","title":"\u6a21\u578b\u8bc4\u4f30","text":"

\u5728\u8bad\u7ec3\u5b8c\u6210\u540e\uff0c\u6211\u4eec\u5728\u9a8c\u8bc1\u96c6\u4e0a\u8bc4\u4f30\u6a21\u578b\u7684\u7cbe\u5ea6\u3002

res = model.eval(loader_val)\nprint(res)\n
{'accuracy': 1.0}\n
"},{"location":"zh/tutorials/finetune/#_12","title":"\u53ef\u89c6\u5316\u6a21\u578b\u63a8\u7406\u7ed3\u679c","text":"

\u5b9a\u4e49 visualize_mode \u51fd\u6570\uff0c\u53ef\u89c6\u5316\u6a21\u578b\u9884\u6d4b\u3002

import matplotlib.pyplot as plt\nimport mindspore as ms\n\ndef visualize_model(model, val_dl, num_classes=2):\n    # \u52a0\u8f7d\u9a8c\u8bc1\u96c6\u7684\u6570\u636e\u8fdb\u884c\u9a8c\u8bc1\n    images, labels= next(val_dl.create_tuple_iterator())\n    # \u9884\u6d4b\u56fe\u50cf\u7c7b\u522b\n    output = model.predict(images)\n    pred = np.argmax(output.asnumpy(), axis=1)\n    # \u663e\u793a\u56fe\u50cf\u53ca\u56fe\u50cf\u7684\u9884\u6d4b\u503c\n    images = images.asnumpy()\n    labels = labels.asnumpy()\n    class_name = {0: \"dogs\", 1: \"wolves\"}\n    plt.figure(figsize=(15, 7))\n    for i in range(len(labels)):\n        plt.subplot(3, 6, i + 1)\n        # \u82e5\u9884\u6d4b\u6b63\u786e\uff0c\u663e\u793a\u4e3a\u84dd\u8272\uff1b\u82e5\u9884\u6d4b\u9519\u8bef\uff0c\u663e\u793a\u4e3a\u7ea2\u8272\n        color = 'blue' if pred[i] == labels[i] else 'red'\n        plt.title('predict:{}'.format(class_name[pred[i]]), color=color)\n        picture_show = np.transpose(images[i], (1, 2, 0))\n        mean = np.array([0.485, 0.456, 0.406])\n        std = np.array([0.229, 0.224, 0.225])\n        picture_show = std * picture_show + mean\n        picture_show = np.clip(picture_show, 0, 1)\n        plt.imshow(picture_show)\n        plt.axis('off')\n\n    plt.show()\n

\u4f7f\u7528\u5fae\u8c03\u8fc7\u540e\u7684\u6a21\u578b\u5bf9\u9a8c\u8bc1\u96c6\u7684\u72fc\u548c\u72d7\u56fe\u50cf\u6570\u636e\u8fdb\u884c\u9884\u6d4b\u3002\u82e5\u9884\u6d4b\u5b57\u4f53\u4e3a\u84dd\u8272\u8868\u793a\u9884\u6d4b\u6b63\u786e\uff0c\u82e5\u9884\u6d4b\u5b57\u4f53\u4e3a\u7ea2\u8272\u8868\u793a\u9884\u6d4b\u9519\u8bef\u3002

visualize_model(model, loader_val)\n

"},{"location":"zh/tutorials/finetune/#2","title":"2. \u51bb\u7ed3\u7279\u5f81\u7f51\u7edc, \u5fae\u8c03\u5206\u7c7b\u5668","text":""},{"location":"zh/tutorials/finetune/#_13","title":"\u51bb\u7ed3\u7279\u5f81\u7f51\u7edc\u7684\u53c2\u6570","text":"

\u9996\u5148\uff0c\u6211\u4eec\u8981\u51bb\u7ed3\u9664\u6700\u540e\u4e00\u5c42\u5206\u7c7b\u5668\u4e4b\u5916\u7684\u6240\u6709\u7f51\u7edc\u5c42\uff0c\u5373\u5c06\u76f8\u5e94\u7684\u5c42\u53c2\u6570\u7684requires_grad\u5c5e\u6027\u8bbe\u7f6e\u4e3aFalse\uff0c\u4f7f\u5176\u4e0d\u5728\u53cd\u5411\u4f20\u64ad\u4e2d\u8ba1\u7b97\u68af\u5ea6\u53ca\u66f4\u65b0\u53c2\u6570\u3002

\u56e0\u4e3amindcv.models \u4e2d\u6240\u6709\u7684\u6a21\u578b\u5747\u4ee5classifier \u6765\u6807\u8bc6\u548c\u547d\u540d\u6a21\u578b\u7684\u5206\u7c7b\u5668(\u5373Dense\u5c42)\uff0c\u6240\u4ee5\u901a\u8fc7 classifier.weight \u548c classifier.bias \u5373\u53ef\u7b5b\u9009\u51fa\u5206\u7c7b\u5668\u5916\u7684\u5404\u5c42\u53c2\u6570\uff0c\u5c06\u5176requires_grad\u5c5e\u6027\u8bbe\u7f6e\u4e3aFalse.

# freeze backbone\nfor param in network.get_parameters():\n    if param.name not in [\"classifier.weight\", \"classifier.bias\"]:\n        param.requires_grad = False\n
"},{"location":"zh/tutorials/finetune/#_14","title":"\u5fae\u8c03\u5206\u7c7b\u5668","text":"

\u56e0\u4e3a\u7279\u5f81\u7f51\u7edc\u5df2\u7ecf\u56fa\u5b9a\uff0c\u6211\u4eec\u4e0d\u5fc5\u62c5\u5fc3\u8bad\u7ec3\u8fc7\u7a0b\u4f1adistort pratrained features\uff0c\u56e0\u6b64\uff0c\u76f8\u6bd4\u4e8e\u7b2c\u4e00\u79cd\u65b9\u6cd5\uff0c\u6211\u4eec\u53ef\u4ee5\u5c06learning rate\u8c03\u5927\u4e00\u4e9b\u3002

\u4e0e\u6ca1\u6709\u9884\u8bad\u7ec3\u6a21\u578b\u76f8\u6bd4\uff0c\u5c06\u8282\u7ea6\u4e00\u5927\u534a\u65f6\u95f4\uff0c\u56e0\u4e3a\u6b64\u65f6\u53ef\u4ee5\u4e0d\u7528\u8ba1\u7b97\u90e8\u5206\u68af\u5ea6\u3002

# \u52a0\u8f7d\u6570\u636e\u96c6\ndataset_train = create_dataset(root=data_dir, split='train', num_parallel_workers=num_workers)\nloader_train = create_loader(\n    dataset=dataset_train,\n    batch_size=16,\n    is_training=True,\n    num_classes=2,\n    transform=trans_train,\n    num_parallel_workers=num_workers,\n)\n\n# \u5b9a\u4e49\u4f18\u5316\u5668\u548c\u635f\u5931\u51fd\u6570\nopt = create_optimizer(network.trainable_params(), opt='adam', lr=1e-3)\nloss = create_loss(name='CE')\n\n# \u5b9e\u4f8b\u5316\u6a21\u578b\nmodel = Model(network, loss_fn=loss, optimizer=opt, metrics={'accuracy'})\n\nmodel.train(10, loader_train, callbacks=[LossMonitor(5), TimeMonitor(5)], dataset_sink_mode=False)\n
epoch: 1 step: 5, loss is 0.051333948969841\nepoch: 1 step: 10, loss is 0.02043312042951584\nepoch: 1 step: 15, loss is 0.16161368787288666\nTrain epoch time: 10228.601 ms, per step time: 681.907 ms\nepoch: 2 step: 5, loss is 0.002121545374393463\nepoch: 2 step: 10, loss is 0.0009798109531402588\nepoch: 2 step: 15, loss is 0.015776708722114563\nTrain epoch time: 562.543 ms, per step time: 37.503 ms\nepoch: 3 step: 5, loss is 0.008056879043579102\nepoch: 3 step: 10, loss is 0.0009347647428512573\nepoch: 3 step: 15, loss is 0.028648357838392258\nTrain epoch time: 523.249 ms, per step time: 34.883 ms\nepoch: 4 step: 5, loss is 0.001014217734336853\nepoch: 4 step: 10, loss is 0.0003159046173095703\nepoch: 4 step: 15, loss is 0.0007699579000473022\nTrain epoch time: 508.886 ms, per step time: 33.926 ms\nepoch: 5 step: 5, loss is 0.0015687644481658936\nepoch: 5 step: 10, loss is 0.012090332806110382\nepoch: 5 step: 15, loss is 0.004598274827003479\nTrain epoch time: 507.243 ms, per step time: 33.816 ms\nepoch: 6 step: 5, loss is 0.010022152215242386\nepoch: 6 step: 10, loss is 0.0066385045647621155\nepoch: 6 step: 15, loss is 0.0036080628633499146\nTrain epoch time: 517.646 ms, per step time: 34.510 ms\nepoch: 7 step: 5, loss is 0.01344013586640358\nepoch: 7 step: 10, loss is 0.0008538365364074707\nepoch: 7 step: 15, loss is 0.14135593175888062\nTrain epoch time: 511.513 ms, per step time: 34.101 ms\nepoch: 8 step: 5, loss is 0.01626245677471161\nepoch: 8 step: 10, loss is 0.02871556021273136\nepoch: 8 step: 15, loss is 0.010110966861248016\nTrain epoch time: 545.678 ms, per step time: 36.379 ms\nepoch: 9 step: 5, loss is 0.008498094975948334\nepoch: 9 step: 10, loss is 0.2588501274585724\nepoch: 9 step: 15, loss is 0.0014278888702392578\nTrain epoch time: 499.243 ms, per step time: 33.283 ms\nepoch: 10 step: 5, loss is 0.021337147802114487\nepoch: 10 step: 10, loss is 0.00829876959323883\nepoch: 10 step: 15, loss is 0.008352771401405334\nTrain epoch time: 465.600 ms, per step time: 31.040 ms\n
"},{"location":"zh/tutorials/finetune/#_15","title":"\u6a21\u578b\u8bc4\u4f30","text":"

\u8bad\u7ec3\u5b8c\u6210\u4e4b\u540e\uff0c\u6211\u4eec\u5728\u9a8c\u8bc1\u96c6\u4e0a\u8bc4\u4f30\u6a21\u578b\u7684\u51c6\u786e\u7387\u3002

dataset_val = create_dataset(root=data_dir, split='val', num_parallel_workers=num_workers)\nloader_val = create_loader(\n    dataset=dataset_val,\n    batch_size=5,\n    is_training=True,\n    num_classes=2,\n    transform=trans_val,\n    num_parallel_workers=num_workers,\n)\n\nres = model.eval(loader_val)\nprint(res)\n
{'accuracy': 1.0}\n
"},{"location":"zh/tutorials/finetune/#_16","title":"\u53ef\u89c6\u5316\u6a21\u578b\u9884\u6d4b","text":"

\u4f7f\u7528\u5fae\u8c03\u8fc7\u540e\u7684\u6a21\u578b\u4ef6\u5bf9\u9a8c\u8bc1\u96c6\u7684\u72fc\u548c\u72d7\u56fe\u50cf\u6570\u636e\u8fdb\u884c\u9884\u6d4b\u3002\u82e5\u9884\u6d4b\u5b57\u4f53\u4e3a\u84dd\u8272\u8868\u793a\u9884\u6d4b\u6b63\u786e\uff0c\u82e5\u9884\u6d4b\u5b57\u4f53\u4e3a\u7ea2\u8272\u8868\u793a\u9884\u6d4b\u9519\u8bef\u3002

visualize_model(model, loader_val)\n

\u5fae\u8c03\u540e\u7684\u72fc\u72d7\u9884\u6d4b\u7ed3\u679c\u5747\u6b63\u786e

"},{"location":"zh/tutorials/inference/","title":"\u56fe\u50cf\u5206\u7c7b\u9884\u6d4b","text":"

\u672c\u6559\u7a0b\u4ecb\u7ecd\u5982\u4f55\u5728MindCV\u4e2d\u8c03\u7528\u9884\u8bad\u7ec3\u6a21\u578b\uff0c\u5728\u6d4b\u8bd5\u56fe\u50cf\u4e0a\u8fdb\u884c\u5206\u7c7b\u9884\u6d4b\u3002

"},{"location":"zh/tutorials/inference/#_2","title":"\u6a21\u578b\u52a0\u8f7d","text":""},{"location":"zh/tutorials/inference/#_3","title":"\u67e5\u770b\u5168\u90e8\u53ef\u7528\u7684\u7f51\u7edc\u6a21\u578b","text":"

\u901a\u8fc7\u8c03\u7528mindcv.models\u4e2d\u7684registry.list_models\u51fd\u6570\uff0c\u53ef\u4ee5\u6253\u5370\u51fa\u5168\u90e8\u7f51\u7edc\u6a21\u578b\u7684\u540d\u5b57\uff0c\u4e00\u4e2a\u7f51\u7edc\u5728\u4e0d\u540c\u53c2\u6570\u914d\u7f6e\u4e0b\u7684\u6a21\u578b\u4e5f\u4f1a\u5206\u522b\u6253\u5370\u51fa\u6765\uff0c\u4f8b\u5982resnet18 / resnet34 / resnet50 / resnet101 / resnet152\u3002

import sys\nsys.path.append(\"..\")\nfrom mindcv.models import registry\nregistry.list_models()\n
['BiT_resnet50',\n 'repmlp_b224',\n 'repmlp_b256',\n 'repmlp_d256',\n 'repmlp_l256',\n 'repmlp_t224',\n 'repmlp_t256',\n 'convit_base',\n 'convit_base_plus',\n 'convit_small',\n ...\n 'visformer_small',\n 'visformer_small_v2',\n 'visformer_tiny',\n 'visformer_tiny_v2',\n 'vit_b_16_224',\n 'vit_b_16_384',\n 'vit_b_32_224',\n 'vit_b_32_384',\n 'vit_l_16_224',\n 'vit_l_16_384',\n 'vit_l_32_224',\n 'xception']\n
"},{"location":"zh/tutorials/inference/#_4","title":"\u52a0\u8f7d\u9884\u8bad\u7ec3\u6a21\u578b","text":"

\u6211\u4eec\u4ee5resnet50\u6a21\u578b\u4e3a\u4f8b\uff0c\u4ecb\u7ecd\u4e24\u79cd\u4f7f\u7528mindcv.models\u4e2dcreate_model\u51fd\u6570\u8fdb\u884c\u6a21\u578bcheckpoint\u52a0\u8f7d\u7684\u65b9\u6cd5\u3002

1). \u5f53\u63a5\u53e3\u4e2d\u7684pretrained\u53c2\u6570\u8bbe\u7f6e\u4e3aTrue\u65f6\uff0c\u53ef\u4ee5\u81ea\u52a8\u4e0b\u8f7d\u7f51\u7edc\u6743\u91cd\u3002

from mindcv.models import create_model\nmodel = create_model(model_name='resnet50', num_classes=1000, pretrained=True)\n# \u5207\u6362\u7f51\u7edc\u7684\u6267\u884c\u903b\u8f91\u4e3a\u63a8\u7406\u573a\u666f\nmodel.set_train(False)\n
102453248B [00:16, 6092186.31B/s]\n\nResNet<\n  (conv1): Conv2d<input_channels=3, output_channels=64, kernel_size=(7, 7), stride=(2, 2), pad_mode=pad, padding=3, dilation=(1, 1), group=1, has_bias=False, weight_init=normal, bias_init=zeros, format=NCHW>\n  (bn1): BatchNorm2d<num_features=64, eps=1e-05, momentum=0.9, gamma=Parameter (name=bn1.gamma, shape=(64,), dtype=Float32, requires_grad=True), beta=Parameter (name=bn1.beta, shape=(64,), dtype=Float32, requires_grad=True), moving_mean=Parameter (name=bn1.moving_mean, shape=(64,), dtype=Float32, requires_grad=False), moving_variance=Parameter (name=bn1.moving_variance, shape=(64,), dtype=Float32, requires_grad=False)>\n  (relu): ReLU<>\n  (max_pool): MaxPool2d<kernel_size=3, stride=2, pad_mode=SAME>\n  ...\n  (pool): GlobalAvgPooling<>\n  (classifier): Dense<input_channels=2048, output_channels=1000, has_bias=True>\n  >\n

2). \u5f53\u63a5\u53e3\u4e2d\u7684checkpoint_path\u53c2\u6570\u8bbe\u7f6e\u4e3a\u6587\u4ef6\u8def\u5f84\u65f6\uff0c\u53ef\u4ee5\u4ece\u672c\u5730\u52a0\u8f7d\u540e\u7f00\u4e3a.ckpt\u7684\u6a21\u578b\u53c2\u6570\u6587\u4ef6\u3002

from mindcv.models import create_model\nmodel = create_model(model_name='resnet50', num_classes=1000, checkpoint_path='./resnet50_224.ckpt')\n# \u5207\u6362\u7f51\u7edc\u7684\u6267\u884c\u903b\u8f91\u4e3a\u63a8\u7406\u573a\u666f\nmodel.set_train(False)\n
"},{"location":"zh/tutorials/inference/#_5","title":"\u6570\u636e\u51c6\u5907","text":""},{"location":"zh/tutorials/inference/#_6","title":"\u6784\u9020\u6570\u636e\u96c6","text":"

\u8fd9\u91cc\uff0c\u6211\u4eec\u4e0b\u8f7d\u4e00\u5f20Wikipedia\u7684\u56fe\u7247\u4f5c\u4e3a\u6d4b\u8bd5\u56fe\u7247\uff0c\u4f7f\u7528mindcv.data\u4e2d\u7684create_dataset\u51fd\u6570\uff0c\u4e3a\u5355\u5f20\u56fe\u7247\u6784\u9020\u81ea\u5b9a\u4e49\u6570\u636e\u96c6\u3002

from mindcv.data import create_dataset\nnum_workers = 1\n# \u6570\u636e\u96c6\u76ee\u5f55\u8def\u5f84\ndata_dir = \"./data/\"\ndataset = create_dataset(root=data_dir, split='test', num_parallel_workers=num_workers)\n# \u56fe\u50cf\u53ef\u89c6\nfrom PIL import Image\nImage.open(\"./data/test/dog/dog.jpg\")\n

"},{"location":"zh/tutorials/inference/#_7","title":"\u6570\u636e\u9884\u5904\u7406","text":"

\u901a\u8fc7\u8c03\u7528create_transforms\u51fd\u6570\uff0c\u83b7\u5f97\u9884\u8bad\u7ec3\u6a21\u578b\u4f7f\u7528\u7684ImageNet\u6570\u636e\u96c6\u7684\u6570\u636e\u5904\u7406\u7b56\u7565(transform list)\u3002

\u6211\u4eec\u5c06\u5f97\u5230\u7684transform list\u4f20\u5165create_loader\u51fd\u6570\uff0c\u6307\u5b9abatch_size=1\u548c\u5176\u4ed6\u53c2\u6570\uff0c\u5373\u53ef\u5b8c\u6210\u6d4b\u8bd5\u6570\u636e\u7684\u51c6\u5907\uff0c\u8fd4\u56deDataset Object\uff0c\u4f5c\u4e3a\u6a21\u578b\u7684\u8f93\u5165\u3002

from mindcv.data import create_transforms, create_loader\ntransforms_list = create_transforms(dataset_name='imagenet', is_training=False)\ndata_loader = create_loader(\n    dataset=dataset,\n    batch_size=1,\n    is_training=False,\n    num_classes=1000,\n    transform=transforms_list,\n    num_parallel_workers=num_workers\n)\n
"},{"location":"zh/tutorials/inference/#_8","title":"\u6a21\u578b\u63a8\u7406","text":"

\u5c06\u81ea\u5b9a\u4e49\u6570\u636e\u96c6\u7684\u56fe\u7247\u4f20\u5165\u6a21\u578b\uff0c\u83b7\u5f97\u63a8\u7406\u7684\u7ed3\u679c\u3002\u8fd9\u91cc\u4f7f\u7528mindspore.ops\u7684Squeeze\u51fd\u6570\u53bb\u9664batch\u7ef4\u5ea6\u3002

import mindspore.ops as P\nimport numpy as np\nimages, _ = next(data_loader.create_tuple_iterator())\noutput = P.Squeeze()(model(images))\npred = np.argmax(output.asnumpy())\n
with open(\"imagenet1000_clsidx_to_labels.txt\") as f:\n    idx2label = eval(f.read())\nprint('predict: {}'.format(idx2label[pred]))\n
predict: Labrador retriever\n
"},{"location":"zh/tutorials/quick_start/","title":"\u5feb\u901f\u5165\u95e8","text":"

MindCV\u662f\u4e00\u4e2a\u57fa\u4e8eMindSpore\u5f00\u53d1\u7684\uff0c\u81f4\u529b\u4e8e\u8ba1\u7b97\u673a\u89c6\u89c9\u76f8\u5173\u6280\u672f\u7814\u53d1\u7684\u5f00\u6e90\u5de5\u5177\u7bb1\u3002 \u5b83\u63d0\u4f9b\u5927\u91cf\u7684\u8ba1\u7b97\u673a\u89c6\u89c9\u9886\u57df\u7684\u7ecf\u5178\u6a21\u578b\u548cSoTA\u6a21\u578b\u4ee5\u53ca\u5b83\u4eec\u7684\u9884\u8bad\u7ec3\u6743\u91cd\u3002\u540c\u65f6\uff0c\u8fd8\u63d0\u4f9b\u4e86AutoAugment\u7b49SoTA\u7b97\u6cd5\u6765\u63d0\u9ad8\u6027\u80fd\u3002 \u901a\u8fc7\u89e3\u8026\u7684\u6a21\u5757\u8bbe\u8ba1\uff0c\u60a8\u53ef\u4ee5\u8f7b\u677e\u5730\u5c06MindCV\u5e94\u7528\u5230\u60a8\u81ea\u5df1\u7684CV\u4efb\u52a1\u4e2d\u3002\u672c\u6559\u7a0b\u4e2d\u6211\u4eec\u5c06\u63d0\u4f9b\u4e00\u4e2a\u5feb\u901f\u4e0a\u624bMindCV\u7684\u6307\u5357\u3002

\u672c\u6559\u7a0b\u5c06\u4ee5DenseNet\u5206\u7c7b\u6a21\u578b\u4e3a\u4f8b\uff0c\u5b9e\u73b0\u5bf9CIFAR-10\u6570\u636e\u96c6\u7684\u8fc1\u79fb\u5b66\u4e60\uff0c\u5e76\u5728\u6b64\u6d41\u7a0b\u4e2d\u5bf9MindCV\u5404\u6a21\u5757\u7684\u7528\u6cd5\u4f5c\u8bb2\u89e3\u3002

"},{"location":"zh/tutorials/quick_start/#_2","title":"\u73af\u5883\u51c6\u5907","text":"

\u8be6\u89c1\u5b89\u88c5\u3002

"},{"location":"zh/tutorials/quick_start/#_3","title":"\u6570\u636e","text":""},{"location":"zh/tutorials/quick_start/#_4","title":"\u6570\u636e\u96c6","text":"

\u901a\u8fc7mindcv.data\u4e2d\u7684create_dataset\u6a21\u5757\uff0c\u6211\u4eec\u53ef\u4ee5\u5feb\u901f\u5730\u8bfb\u53d6\u6807\u51c6\u6570\u636e\u96c6\u6216\u81ea\u5b9a\u4e49\u7684\u6570\u636e\u96c6\u3002

import os\nfrom mindcv.data import create_dataset, create_transforms, create_loader\n\ncifar10_dir = './datasets/cifar/cifar-10-batches-bin'  # \u4f60\u7684\u6570\u636e\u5b58\u653e\u8def\u5f84\nnum_classes = 10  # \u7c7b\u522b\u6570\nnum_workers = 8  # \u6570\u636e\u8bfb\u53d6\u53ca\u52a0\u8f7d\u7684\u5de5\u4f5c\u7ebf\u7a0b\u6570\n\n# \u521b\u5efa\u6570\u636e\u96c6\ndataset_train = create_dataset(\n    name='cifar10', root=cifar10_dir, split='train', shuffle=True, num_parallel_workers=num_workers\n)\n
"},{"location":"zh/tutorials/quick_start/#_5","title":"\u6570\u636e\u53d8\u6362","text":"

create_transforms\u51fd\u6570\u53ef\u76f4\u63a5\u751f\u6210\u9002\u914d\u6807\u51c6\u6570\u636e\u96c6\u7684\u6570\u636e\u5904\u7406\u589e\u5f3a\u7b56\u7565(transform list)\uff0c\u5305\u62ecCifar10, ImageNet\u4e0a\u5e38\u7528\u7684\u6570\u636e\u5904\u7406\u7b56\u7565\u3002

# \u521b\u5efa\u6240\u9700\u7684\u6570\u636e\u589e\u5f3a\u64cd\u4f5c\u7684\u5217\u8868\ntrans = create_transforms(dataset_name='cifar10', image_resize=224)\n
"},{"location":"zh/tutorials/quick_start/#_6","title":"\u6570\u636e\u52a0\u8f7d","text":"

\u901a\u8fc7mindcv.data.create_loader\u51fd\u6570\uff0c\u8fdb\u884c\u6570\u636e\u8f6c\u6362\u548cbatch\u5207\u5206\u52a0\u8f7d\uff0c\u6211\u4eec\u9700\u8981\u5c06create_transforms\u8fd4\u56de\u7684transform_list\u4f20\u5165\u3002

# \u6267\u884c\u6570\u636e\u589e\u5f3a\u64cd\u4f5c\uff0c\u751f\u6210\u6240\u9700\u6570\u636e\u96c6\u3002\nloader_train = create_loader(dataset=dataset_train,\n                             batch_size=64,\n                             is_training=True,\n                             num_classes=num_classes,\n                             transform=trans,\n                             num_parallel_workers=num_workers)\n\nnum_batches = loader_train.get_dataset_size()\n

\u5728notebook\u4e2d\u907f\u514d\u91cd\u590d\u6267\u884ccreate_loader\u5355\u4e2aCell\uff0c\u6216\u5728\u6267\u884ccreate_dataset\u4e4b\u540e\u518d\u6b21\u6267\u884c\u3002

"},{"location":"zh/tutorials/quick_start/#_7","title":"\u6a21\u578b\u521b\u5efa\u548c\u52a0\u8f7d","text":"

\u4f7f\u7528create_model\u63a5\u53e3\u83b7\u5f97\u5b9e\u4f8b\u5316\u7684DenseNet\uff0c\u5e76\u52a0\u8f7d\u9884\u8bad\u7ec3\u6743\u91cddensenet_121_224.ckpt\uff08ImageNet\u6570\u636e\u96c6\u8bad\u7ec3\u5f97\u5230\uff09\u3002

from mindcv.models import create_model\n\n# \u5b9e\u4f8b\u5316 DenseNet-121 \u6a21\u578b\u5e76\u52a0\u8f7d\u9884\u8bad\u7ec3\u6743\u91cd\u3002\nnetwork = create_model(model_name='densenet121', num_classes=num_classes, pretrained=True)\n

\u7531\u4e8eCIFAR-10\u548cImageNet\u6570\u636e\u96c6\u6240\u9700\u7c7b\u522b\u6570\u91cf\u4e0d\u540c\uff0c\u5206\u7c7b\u5668\u53c2\u6570\u65e0\u6cd5\u5171\u4eab\uff0c\u51fa\u73b0\u5206\u7c7b\u5668\u53c2\u6570\u65e0\u6cd5\u52a0\u8f7d\u7684\u544a\u8b66\u4e0d\u5f71\u54cd\u5fae\u8c03\u3002

"},{"location":"zh/tutorials/quick_start/#_8","title":"\u635f\u5931\u51fd\u6570","text":"

\u901a\u8fc7create_loss\u63a5\u53e3\u83b7\u5f97\u635f\u5931\u51fd\u6570

from mindcv.loss import create_loss\n\nloss = create_loss(name='CE')\n
"},{"location":"zh/tutorials/quick_start/#_9","title":"\u5b66\u4e60\u7387\u8c03\u5ea6\u5668","text":"

\u4f7f\u7528create_scheduler\u63a5\u53e3\u8bbe\u7f6e\u5b66\u4e60\u7387\u7b56\u7565\u3002

from mindcv.scheduler import create_scheduler\n\n# \u8bbe\u7f6e\u5b66\u4e60\u7387\u7b56\u7565\nlr_scheduler = create_scheduler(steps_per_epoch=num_batches,\n                                scheduler='constant',\n                                lr=0.0001)\n
"},{"location":"zh/tutorials/quick_start/#_10","title":"\u4f18\u5316\u5668","text":"

\u4f7f\u7528create_optimizer\u63a5\u53e3\u521b\u5efa\u4f18\u5316\u5668\u3002

from mindcv.optim import create_optimizer\n\n# \u8bbe\u7f6e\u4f18\u5316\u5668\nopt = create_optimizer(network.trainable_params(), opt='adam', lr=lr_scheduler)\n
"},{"location":"zh/tutorials/quick_start/#_11","title":"\u8bad\u7ec3","text":"

\u4f7f\u7528mindspore.Model\u63a5\u53e3\u6839\u636e\u7528\u6237\u4f20\u5165\u7684\u53c2\u6570\u5c01\u88c5\u53ef\u8bad\u7ec3\u7684\u5b9e\u4f8b\u3002

from mindspore import Model\n\n# \u5c01\u88c5\u53ef\u8bad\u7ec3\u6216\u63a8\u7406\u7684\u5b9e\u4f8b\nmodel = Model(network, loss_fn=loss, optimizer=opt, metrics={'accuracy'})\n

\u4f7f\u7528mindspore.Model.train\u63a5\u53e3\u8fdb\u884c\u6a21\u578b\u8bad\u7ec3\u3002

from mindspore import LossMonitor, TimeMonitor, CheckpointConfig, ModelCheckpoint\n\n# \u8bbe\u7f6e\u5728\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u4fdd\u5b58\u7f51\u7edc\u53c2\u6570\u7684\u56de\u8c03\u51fd\u6570\nckpt_save_dir = './ckpt'\nckpt_config = CheckpointConfig(save_checkpoint_steps=num_batches)\nckpt_cb = ModelCheckpoint(prefix='densenet121-cifar10',\n                          directory=ckpt_save_dir,\n                          config=ckpt_config)\n\nmodel.train(5, loader_train, callbacks=[LossMonitor(num_batches//5), TimeMonitor(num_batches//5), ckpt_cb], dataset_sink_mode=False)\n
[WARNING] DEVICE(1769,ffff87c70ac0,python):2022-12-21-16:04:30.001.890 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:330] FilterRaisedOrReducePrecisionMatchedKernelInfo] Operator:[Default/network-WithLossCell/_loss_fn-CrossEntropySmooth/GatherD-op5273] don't support int64, reduce precision from int64 to int32.\n\n\nepoch: 1 step: 156, loss is 2.0816354751586914\nepoch: 1 step: 312, loss is 1.4474115371704102\nepoch: 1 step: 468, loss is 0.8935483694076538\nepoch: 1 step: 624, loss is 0.5588696002960205\nepoch: 1 step: 780, loss is 0.3161369860172272\n\n\n[WARNING] DEVICE(1769,ffff87c70ac0,python):2022-12-21-16:09:20.261.851 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:330] FilterRaisedOrReducePrecisionMatchedKernelInfo] Operator:[Default/network-WithLossCell/_loss_fn-CrossEntropySmooth/GatherD-op16720] don't support int64, reduce precision from int64 to int32.\n\n\nTrain epoch time: 416429.509 ms, per step time: 532.519 ms\nepoch: 2 step: 154, loss is 0.19752007722854614\nepoch: 2 step: 310, loss is 0.14635677635669708\nepoch: 2 step: 466, loss is 0.3511860966682434\nepoch: 2 step: 622, loss is 0.12542471289634705\nepoch: 2 step: 778, loss is 0.22351759672164917\nTrain epoch time: 156746.872 ms, per step time: 200.444 ms\nepoch: 3 step: 152, loss is 0.08965137600898743\nepoch: 3 step: 308, loss is 0.22765043377876282\nepoch: 3 step: 464, loss is 0.19035443663597107\nepoch: 3 step: 620, loss is 0.06591956317424774\nepoch: 3 step: 776, loss is 0.0934530645608902\nTrain epoch time: 156574.210 ms, per step time: 200.223 ms\nepoch: 4 step: 150, loss is 0.03782692924141884\nepoch: 4 step: 306, loss is 0.023876197636127472\nepoch: 4 step: 462, loss is 0.038690414279699326\nepoch: 4 step: 618, loss is 0.15388774871826172\nepoch: 4 step: 774, loss is 0.1581358164548874\nTrain epoch time: 158398.108 ms, per step time: 202.555 ms\nepoch: 5 step: 148, loss is 0.06556802988052368\nepoch: 5 step: 304, loss is 0.006707251071929932\nepoch: 5 step: 460, loss is 0.02353120595216751\nepoch: 5 step: 616, loss is 0.014183484017848969\nepoch: 5 step: 772, loss is 0.09367241710424423\nTrain epoch time: 154978.618 ms, per step time: 198.182 ms\n
"},{"location":"zh/tutorials/quick_start/#_12","title":"\u8bc4\u4f30","text":"

\u73b0\u5728\u8ba9\u6211\u4eec\u5728CIFAR-10\u4e0a\u5bf9\u521a\u521a\u8bad\u7ec3\u7684\u6a21\u578b\u8fdb\u884c\u8bc4\u4f30\u3002

# \u52a0\u8f7d\u9a8c\u8bc1\u6570\u636e\u96c6\ndataset_val = create_dataset(name='cifar10', root=cifar10_dir, split='test', shuffle=True, num_parallel_workers=num_workers, download=download)\n\n# \u6267\u884c\u6570\u636e\u589e\u5f3a\u64cd\u4f5c\uff0c\u751f\u6210\u6240\u9700\u6570\u636e\u96c6\u3002\nloader_val = create_loader(dataset=dataset_val,\n                           batch_size=64,\n                           is_training=False,\n                           num_classes=num_classes,\n                           transform=trans,\n                           num_parallel_workers=num_workers)\n

\u52a0\u8f7d\u5fae\u8c03\u540e\u7684\u53c2\u6570\u6587\u4ef6\uff08densenet121-cifar10-5_782.ckpt\uff09\u5230\u6a21\u578b\u3002

\u6839\u636e\u7528\u6237\u4f20\u5165\u7684\u53c2\u6570\u5c01\u88c5\u53ef\u63a8\u7406\u7684\u5b9e\u4f8b\uff0c\u52a0\u8f7d\u9a8c\u8bc1\u6570\u636e\u96c6\uff0c\u9a8c\u8bc1\u5fae\u8c03\u7684 DenseNet121\u6a21\u578b\u7cbe\u5ea6\u3002

# \u9a8c\u8bc1\u5fae\u8c03\u540e\u7684DenseNet121\u7684\u7cbe\u5ea6\nacc = model.eval(loader_val, dataset_sink_mode=False)\nprint(acc)\n
[WARNING] DEVICE(1769,ffff87c70ac0,python):2022-12-21-16:24:11.927.472 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:330] FilterRaisedOrReducePrecisionMatchedKernelInfo] Operator:[Default/network-WithLossCell/_loss_fn-CrossEntropySmooth/GatherD-op24314] don't support int64, reduce precision from int64 to int32.\n\n\n{'accuracy': 0.951}\n\n\n[WARNING] DEVICE(1769,ffff87c70ac0,python):2022-12-21-16:25:01.871.273 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:330] FilterRaisedOrReducePrecisionMatchedKernelInfo] Operator:[Default/network-WithLossCell/_loss_fn-CrossEntropySmooth/GatherD-op27139] don't support int64, reduce precision from int64 to int32.\n
"},{"location":"zh/tutorials/quick_start/#yaml","title":"\u4f7f\u7528YAML\u6587\u4ef6\u8fdb\u884c\u6a21\u578b\u8bad\u7ec3\u548c\u9a8c\u8bc1","text":"

\u6211\u4eec\u8fd8\u53ef\u4ee5\u76f4\u63a5\u4f7f\u7528\u8bbe\u7f6e\u597d\u6a21\u578b\u53c2\u6570\u7684yaml\u6587\u4ef6\uff0c\u901a\u8fc7train.py\u548cvalidate.py\u811a\u672c\u6765\u5feb\u901f\u6765\u5bf9\u6a21\u578b\u8fdb\u884c\u8bad\u7ec3\u548c\u9a8c\u8bc1\u3002\u4ee5\u4e0b\u662f\u5728ImageNet\u4e0a\u8bad\u7ec3SqueezenetV1\u7684\u793a\u4f8b \uff08\u9700\u8981\u5c06ImageNet\u63d0\u524d\u4e0b\u8f7d\u5230\u76ee\u5f55\u4e0b\uff09

\u8be6\u7ec6\u6559\u7a0b\u8bf7\u53c2\u8003 \u4f7f\u7528yaml\u6587\u4ef6\u7684\u6559\u7a0b

#  \u5355\u5361\u8bad\u7ec3\npython train.py -c configs/squeezenet/squeezenet_1.0_gpu.yaml --data_dir /path/to/dataset --distribute False\n
python validate.py -c configs/squeezenet/squeezenet_1.0_gpu.yaml --data_dir /path/to/dataset --ckpt_path /path/to/ckpt\n
"}]} \ No newline at end of file diff --git a/sitemap.xml b/sitemap.xml new file mode 100644 index 000000000..25ff84f41 --- /dev/null +++ b/sitemap.xml @@ -0,0 +1,163 @@ + + + + https://mindspore-lab.github.io/mindcv/ + 2023-07-18 + daily + + + + + + https://mindspore-lab.github.io/mindcv/installation/ + 2023-07-18 + daily + + + + + + https://mindspore-lab.github.io/mindcv/modelzoo/ + 2023-07-18 + daily + + + + + + https://mindspore-lab.github.io/mindcv/how_to_guides/write_a_new_model/ + 2023-07-18 + daily + + + + + + https://mindspore-lab.github.io/mindcv/notes/changelog/ + 2023-07-18 + daily + + + + + + https://mindspore-lab.github.io/mindcv/notes/code_of_conduct/ + 2023-07-18 + daily + + + + + + https://mindspore-lab.github.io/mindcv/notes/contributing/ + 2023-07-18 + daily + + + + + + https://mindspore-lab.github.io/mindcv/notes/faq/ + 2023-07-18 + daily + + + + + + https://mindspore-lab.github.io/mindcv/reference/data/ + 2023-07-18 + daily + + + + + + https://mindspore-lab.github.io/mindcv/reference/loss/ + 2023-07-18 + daily + + + + + + https://mindspore-lab.github.io/mindcv/reference/models.layers/ + 2023-07-18 + daily + + + + + + https://mindspore-lab.github.io/mindcv/reference/models/ + 2023-07-18 + daily + + + + + + https://mindspore-lab.github.io/mindcv/reference/optim/ + 2023-07-18 + daily + + + + + + https://mindspore-lab.github.io/mindcv/reference/scheduler/ + 2023-07-18 + daily + + + + + + https://mindspore-lab.github.io/mindcv/reference/utils/ + 2023-07-18 + daily + + + + + + https://mindspore-lab.github.io/mindcv/tutorials/configuration/ + 2023-07-18 + daily + + + + + + https://mindspore-lab.github.io/mindcv/tutorials/deployment/ + 2023-07-18 + daily + + + + + + https://mindspore-lab.github.io/mindcv/tutorials/finetune/ + 2023-07-18 + daily + + + + + + https://mindspore-lab.github.io/mindcv/tutorials/inference/ + 2023-07-18 + daily + + + + + + https://mindspore-lab.github.io/mindcv/tutorials/quick_start/ + 2023-07-18 + daily + + + + + \ No newline at end of file diff --git a/sitemap.xml.gz b/sitemap.xml.gz new file mode 100644 index 000000000..210fc3c3e Binary files /dev/null and b/sitemap.xml.gz differ diff --git a/tutorials/configuration/index.html b/tutorials/configuration/index.html new file mode 100644 index 000000000..070dfaffc --- /dev/null +++ b/tutorials/configuration/index.html @@ -0,0 +1,1635 @@ + + + + + + + + + + + + + + + + + + + + + + + + Configuration - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Configuration

+

Download Notebook

+

MindCV can parse the yaml file of the model through the argparse library and PyYAML library to configure parameters. +Let's use squeezenet_1.0 model as an example to explain how to configure the corresponding parameters.

+

Basic Environment

+
    +
  1. Parameter description
  2. +
+
    +
  • +

    mode: Use graph mode (0) or pynative mode (1).

    +
  • +
  • +

    distribute: Whether to use distributed.

    +
  • +
+
    +
  1. +

    Sample yaml file

    +
    mode: 0
    +distribute: True
    +...
    +
    +
  2. +
  3. +

    Parse parameter setting

    +
    python train.py --mode 0 --distribute False ...
    +
    +
  4. +
  5. +

    Corresponding code example

    +
    +

    args.mode represents the parameter mode, args.distribute represents the parameter distribute.

    +
    +
    def train(args):
    +    ms.set_context(mode=args.mode)
    +
    +    if args.distribute:
    +        init()
    +        device_num = get_group_size()
    +        rank_id = get_rank()
    +        ms.set_auto_parallel_context(device_num=device_num,
    +                                     parallel_mode='data_parallel',
    +                                     gradients_mean=True)
    +    else:
    +        device_num = None
    +        rank_id = None
    +    ...
    +
    +
  6. +
+

Dataset

+
    +
  1. Parameter description
  2. +
+
    +
  • +

    dataset: dataset name.

    +
  • +
  • +

    data_dir: Path of dataset file.

    +
  • +
  • +

    shuffle: whether to shuffle the dataset.

    +
  • +
  • +

    dataset_download: whether to download the dataset.

    +
  • +
  • +

    batch_size: The number of rows in each batch.

    +
  • +
  • +

    drop_remainder: Determines whether to drop the last block whose data row number is less than the batch size.

    +
  • +
  • +

    num_parallel_workers: Number of workers(threads) to process the dataset in parallel.

    +
  • +
+
    +
  1. +

    Sample yaml file

    +
    dataset: 'imagenet'
    +data_dir: './imagenet2012'
    +shuffle: True
    +dataset_download: False
    +batch_size: 32
    +drop_remainder: True
    +num_parallel_workers: 8
    +...
    +
    +
  2. +
  3. +

    Parse parameter setting

    +
    python train.py ... --dataset imagenet --data_dir ./imagenet2012 --shuffle True \
    +    --dataset_download False --batch_size 32 --drop_remainder True \
    +    --num_parallel_workers 8 ...
    +
    +
  4. +
  5. +

    Corresponding code example

    +
    def train(args):
    +    ...
    +    dataset_train = create_dataset(
    +        name=args.dataset,
    +        root=args.data_dir,
    +        split='train',
    +        shuffle=args.shuffle,
    +        num_samples=args.num_samples,
    +        num_shards=device_num,
    +        shard_id=rank_id,
    +        num_parallel_workers=args.num_parallel_workers,
    +        download=args.dataset_download,
    +        num_aug_repeats=args.aug_repeats)
    +
    +    ...
    +    target_transform = transforms.OneHot(num_classes) if args.loss == 'BCE' else None
    +
    +    loader_train = create_loader(
    +        dataset=dataset_train,
    +        batch_size=args.batch_size,
    +        drop_remainder=args.drop_remainder,
    +        is_training=True,
    +        mixup=args.mixup,
    +        cutmix=args.cutmix,
    +        cutmix_prob=args.cutmix_prob,
    +        num_classes=args.num_classes,
    +        transform=transform_list,
    +        target_transform=target_transform,
    +        num_parallel_workers=args.num_parallel_workers,
    +    )
    +    ...
    +
    +
  6. +
+

Data Augmentation

+
    +
  1. Parameter description
  2. +
+
    +
  • +

    image_resize: the image size after resizing for adapting to the network.

    +
  • +
  • +

    scale: random resize scale.

    +
  • +
  • +

    ratio: random resize aspect ratio.

    +
  • +
  • +

    hfilp: horizontal flip training aug probability.

    +
  • +
  • +

    interpolation: image interpolation mode for resize operator.

    +
  • +
  • +

    crop_pct: input image center crop percent.

    +
  • +
  • +

    color_jitter: color jitter factor.

    +
  • +
  • +

    re_prob: the probability of performing erasing.

    +
  • +
+
    +
  1. +

    Sample yaml file

    +
    image_resize: 224
    +scale: [0.08, 1.0]
    +ratio: [0.75, 1.333]
    +hflip: 0.5
    +interpolation: 'bilinear'
    +crop_pct: 0.875
    +color_jitter: [0.4, 0.4, 0.4]
    +re_prob: 0.5
    +...
    +
    +
  2. +
  3. +

    Parse parameter setting

    +
    python train.py ... --image_resize 224 --scale [0.08, 1.0] --ratio [0.75, 1.333] \
    +    --hflip 0.5 --interpolation "bilinear" --crop_pct 0.875 \
    +    --color_jitter [0.4, 0.4, 0.4] --re_prob 0.5 ...
    +
    +
  4. +
  5. +

    Corresponding code example

    +
    def train(args):
    +    ...
    +    transform_list = create_transforms(
    +        dataset_name=args.dataset,
    +        is_training=True,
    +        image_resize=args.image_resize,
    +        scale=args.scale,
    +        ratio=args.ratio,
    +        hflip=args.hflip,
    +        vflip=args.vflip,
    +        color_jitter=args.color_jitter,
    +        interpolation=args.interpolation,
    +        auto_augment=args.auto_augment,
    +        mean=args.mean,
    +        std=args.std,
    +        re_prob=args.re_prob,
    +        re_scale=args.re_scale,
    +        re_ratio=args.re_ratio,
    +        re_value=args.re_value,
    +        re_max_attempts=args.re_max_attempts
    +    )
    +    ...
    +
    +
  6. +
+

Model

+
    +
  1. Parameter description
  2. +
+
    +
  • +

    model: model name.

    +
  • +
  • +

    num_classes: number of label classes.

    +
  • +
  • +

    pretrained: whether load pretrained model.

    +
  • +
  • +

    ckpt_path: initialize model from this checkpoint.

    +
  • +
  • +

    keep_checkpoint_max: max number of checkpoint files.

    +
  • +
  • +

    ckpt_save_dir: the path of checkpoint.

    +
  • +
  • +

    epoch_size: train epoch size.

    +
  • +
  • +

    dataset_sink_mode: the dataset sink mode.

    +
  • +
  • +

    amp_level: auto mixed precision level for saving memory and acceleration.

    +
  • +
+
    +
  1. +

    Sample yaml file

    +
    model: 'squeezenet1_0'
    +num_classes: 1000
    +pretrained: False
    +ckpt_path: './squeezenet1_0_gpu.ckpt'
    +keep_checkpoint_max: 10
    +ckpt_save_dir: './ckpt/'
    +epoch_size: 200
    +dataset_sink_mode: True
    +amp_level: 'O0'
    +...
    +
    +
  2. +
  3. +

    Parse parameter setting

    +
    python train.py ... --model squeezenet1_0 --num_classes 1000 --pretrained False \
    +    --ckpt_path ./squeezenet1_0_gpu.ckpt --keep_checkpoint_max 10 \
    +    --ckpt_save_path ./ckpt/ --epoch_size 200 --dataset_sink_mode True \
    +    --amp_level O0 ...
    +
    +
  4. +
  5. +

    Corresponding code example

    +
    def train(args):
    +    ...
    +    network = create_model(model_name=args.model,
    +        num_classes=args.num_classes,
    +        in_channels=args.in_channels,
    +        drop_rate=args.drop_rate,
    +        drop_path_rate=args.drop_path_rate,
    +        pretrained=args.pretrained,
    +        checkpoint_path=args.ckpt_path,
    +        ema=args.ema
    +    )
    +    ...
    +
    +
  6. +
+

Loss Function

+
    +
  1. Parameter description
  2. +
+
    +
  • +

    loss: name of loss function, BCE (BinaryCrossEntropy) or CE (CrossEntropy).

    +
  • +
  • +

    label_smoothing: use label smoothing.

    +
  • +
+
    +
  1. +

    Sample yaml file

    +
    loss: 'CE'
    +label_smoothing: 0.1
    +...
    +
    +
  2. +
  3. +

    Parse parameter setting

    +
    python train.py ... --loss CE --label_smoothing 0.1 ...
    +
    +
  4. +
  5. +

    Corresponding code example

    +
    def train(args):
    +    ...
    +    loss = create_loss(name=args.loss,
    +        reduction=args.reduction,
    +        label_smoothing=args.label_smoothing,
    +        aux_factor=args.aux_factor
    +     )
    +    ...
    +
    +
  6. +
+

Learning Rate Scheduler

+
    +
  1. Parameter description
  2. +
+
    +
  • +

    scheduler: name of scheduler.

    +
  • +
  • +

    min_lr: the minimum value of learning rate if the scheduler supports.

    +
  • +
  • +

    lr: learning rate.

    +
  • +
  • +

    warmup_epochs: warmup epochs.

    +
  • +
  • +

    decay_epochs: decay epochs.

    +
  • +
+
    +
  1. +

    Sample yaml file

    +
    scheduler: 'cosine_decay'
    +min_lr: 0.0
    +lr: 0.01
    +warmup_epochs: 0
    +decay_epochs: 200
    +...
    +
    +
  2. +
  3. +

    Parse parameter setting

    +
    python train.py ... --scheduler cosine_decay --min_lr 0.0 --lr 0.01 \
    +    --warmup_epochs 0 --decay_epochs 200 ...
    +
    +
  4. +
  5. +

    Corresponding code example

    +
    def train(args):
    +    ...
    +    lr_scheduler = create_scheduler(num_batches,
    +        scheduler=args.scheduler,
    +        lr=args.lr,
    +        min_lr=args.min_lr,
    +        warmup_epochs=args.warmup_epochs,
    +        warmup_factor=args.warmup_factor,
    +        decay_epochs=args.decay_epochs,
    +        decay_rate=args.decay_rate,
    +        milestones=args.multi_step_decay_milestones,
    +        num_epochs=args.epoch_size,
    +        lr_epoch_stair=args.lr_epoch_stair
    +    )
    +    ...
    +
    +
  6. +
+

Optimizer

+
    +
  1. Parameter description
  2. +
+
    +
  • +

    opt: name of optimizer.

    +
  • +
  • +

    filter_bias_and_bn: filter Bias and BatchNorm.

    +
  • +
  • +

    momentum: Hyperparameter of type float, means momentum for the moving average.

    +
  • +
  • +

    weight_decay: weight decay (L2 penalty).

    +
  • +
  • +

    loss_scale: gradient scaling factor

    +
  • +
  • +

    use_nesterov: whether enables the Nesterov momentum

    +
  • +
+
    +
  1. +

    Sample yaml file

    +
    opt: 'momentum'
    +filter_bias_and_bn: True
    +momentum: 0.9
    +weight_decay: 0.00007
    +loss_scale: 1024
    +use_nesterov: False
    +...
    +
    +
  2. +
  3. +

    Parse parameter setting

    +
    python train.py ... --opt momentum --filter_bias_and_bn True --weight_decay 0.00007 \
    +    --loss_scale 1024 --use_nesterov False ...
    +
    +
  4. +
  5. +

    Corresponding code example

    +
    def train(args):
    +    ...
    +    if args.ema:
    +        optimizer = create_optimizer(network.trainable_params(),
    +            opt=args.opt,
    +            lr=lr_scheduler,
    +            weight_decay=args.weight_decay,
    +            momentum=args.momentum,
    +            nesterov=args.use_nesterov,
    +            filter_bias_and_bn=args.filter_bias_and_bn,
    +            loss_scale=args.loss_scale,
    +            checkpoint_path=opt_ckpt_path,
    +            eps=args.eps
    +        )
    +    else:
    +        optimizer = create_optimizer(network.trainable_params(),
    +            opt=args.opt,
    +            lr=lr_scheduler,
    +            weight_decay=args.weight_decay,
    +            momentum=args.momentum,
    +            nesterov=args.use_nesterov,
    +            filter_bias_and_bn=args.filter_bias_and_bn,
    +            checkpoint_path=opt_ckpt_path,
    +            eps=args.eps
    +        )
    +    ...
    +
    +
  6. +
+

Combination of Yaml and Parse

+

You can override the parameter settings in the yaml file by using parse to set parameters. Take the following shell command as an example,

+
python train.py -c ./configs/squeezenet/squeezenet_1.0_gpu.yaml --data_dir ./data
+
+

The above command overwrites the value of args.data_dir parameter from ./imagenet2012 in yaml file to ./data.

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/tutorials/deployment/index.html b/tutorials/deployment/index.html new file mode 100644 index 000000000..830b2ca70 --- /dev/null +++ b/tutorials/deployment/index.html @@ -0,0 +1,1290 @@ + + + + + + + + + + + + + + + + + + + + + + + + Deployment - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Inference Service Deployment

+

MindSpore Serving is a lightweight and high-performance service module that helps MindSpore developers efficiently deploy online inference services in the production environment. After completing model training on MindSpore, you can export the MindSpore model and use MindSpore Serving to create an inference service for the model.

+

This tutorial uses mobilenet_v2_100 network as an example to describe how to deploy the Inference Service based on MindSpore Serving.

+

Environment Preparation

+

Before deploying, ensure that MindSpore Serving has been properly installed and the environment variables are configured. To install and configure MindSpore Serving on your PC, go to the MindSpore Serving installation page.

+

Exporting the Model

+

To implement cross-platform or hardware inference (e.g., Ascend AI processor, MindSpore device side, GPU, etc.), the model file of MindIR format should be generated by network definition and CheckPoint. In MindSpore, the function of exporting the network model is export and the main parameters are as follows:

+
    +
  • net: MindSpore network structure.
  • +
  • inputs: Network input, the supported input type is Tensor. If multiple values are input, the values should be input at the same time, for example, ms.export(network, ms.Tensor(input1), ms.Tensor(input2), file_name='network', file_format='MINDIR').
  • +
  • file_name: Name of the exported model file. If file_name doesn't contain the corresponding suffix (for example, .mindir), the system will automatically add one after file_format is set.
  • +
  • file_format: MindSpore currently supports ‘AIR’, ‘ONNX’ and ‘MINDIR’ format for exported models.
  • +
+

The following code uses mobilenet_v2_100 as an example to export the pretrained network model of MindCV and obtain the model file in MindIR format.

+
from mindcv.models import create_model
+import numpy as np
+import mindspore as ms
+
+model = create_model(model_name='mobilenet_v2_100', num_classes=1000, pretrained=True)
+
+input_np = np.random.uniform(0.0, 1.0, size=[1, 3, 224, 224]).astype(np.float32)
+
+# Export mobilenet_v2_100.mindir to the current folder.
+ms.export(model, ms.Tensor(input_np), file_name='mobilenet_v2_100', file_format='MINDIR')
+
+

Deploying the Serving Inference Service

+

Configuring the Service

+

Start Serving with the following files:

+
demo
+├── mobilenet_v2_100
+│   ├── 1
+│   │   └── mobilenet_v2_100.mindir
+│   └── servable_config.py
+│── serving_server.py
+├── serving_client.py
+├── imagenet1000_clsidx_to_labels.txt
+└── test_image
+    ├─ dog
+    │   ├─ dog.jpg
+    │   └─ ……
+    └─ ……
+
+
    +
  • mobilenet_v2_100: Model folder. The folder name is the model name.
  • +
  • mobilenet_v2_100.mindir: Model file generated by the network in the previous step, which is stored in folder 1 (the number indicates the version number). Different versions are stored in different folders. The version number must be a string of digits. By default, the latest model file is started.
  • +
  • servable_config.py: Model configuration script. Declare the model and specify the input and output parameters of the model.
  • +
  • serving_server.py: Script to start the Serving server.
  • +
  • serving_client.py: Script to start the Python client.
  • +
  • imagenet1000_clsidx_to_labels.txt: Index of 1000 labels for the ImageNet dataset, available at examples/data/.
  • +
  • test_image: Test images, available at README.
  • +
+

Content of the configuration file servable_config.py:

+
from mindspore_serving.server import register
+
+# Declare the model. The parameter model_file indicates the name of the model file and model_format indicates the model type.
+model = register.declare_model(model_file="mobilenet_v2_100.mindir", model_format="MindIR")
+
+# The input parameters of the Servable method are specified by the input parameters of the Python method. The output parameters of the Servable method are specified by the output_names of register_method.
+@register.register_method(output_names=["score"])
+def predict(image):
+    x = register.add_stage(model, image, outputs_count=1)
+    return x
+
+

Starting the Service

+

The server function of MindSpore can provide deployment service through either gRPC or RESTful. The following uses gRPC as an example. The service startup script serving_server.py deploys the mobilenet_v2_100 in the local directory to device 0 and starts the gRPC server at 127.0.0.1:5500. Content of the script:

+
import os
+import sys
+from mindspore_serving import server
+
+def start():
+    servable_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
+
+    servable_config = server.ServableStartConfig(servable_directory=servable_dir, servable_name="mobilenet_v2_100",
+                                                 device_ids=0)
+    server.start_servables(servable_configs=servable_config)
+    server.start_grpc_server(address="127.0.0.1:5500")
+
+if __name__ == "__main__":
+    start()
+
+

If the following log information is displayed on the server, the gRPC service is started successfully.

+
Serving gRPC server start success, listening on 127.0.0.1:5500
+
+

Inference Execution

+

Start the Python client by using serving_client.py. The client script uses the create_transforms, create_dataset and create_loader functions of mindcv.data to preprocess the image and send the image to the serving server, then postprocesse the result returned by the server and prints the prediction label of the image.

+
import os
+from mindspore_serving.client import Client
+import numpy as np
+from mindcv.data import create_transforms, create_dataset, create_loader
+
+num_workers = 1
+
+# Dataset directory path
+data_dir = "./test_image/"
+
+dataset = create_dataset(root=data_dir, split='', num_parallel_workers=num_workers)
+transforms_list = create_transforms(dataset_name='ImageNet', is_training=False)
+data_loader = create_loader(
+    dataset=dataset,
+    batch_size=1,
+    is_training=False,
+    num_classes=1000,
+    transform=transforms_list,
+    num_parallel_workers=num_workers
+)
+with open("imagenet1000_clsidx_to_labels.txt") as f:
+    idx2label = eval(f.read())
+
+def postprocess(score):
+    max_idx = np.argmax(score)
+    return idx2label[max_idx]
+
+def predict():
+    client = Client("127.0.0.1:5500", "mobilenet_v2_100", "predict")
+    instances = []
+    images, _ = next(data_loader.create_tuple_iterator())
+    image_np = images.asnumpy().squeeze()
+    instances.append({"image": image_np})
+    result = client.infer(instances)
+
+    for instance in result:
+        label = postprocess(instance["score"])
+        print(label)
+
+if __name__ == '__main__':
+    predict()
+
+

If the following information is displayed, serving service has correctly executed the inference of the mobilenet_v2_100 model: +

Labrador retriever
+

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/tutorials/finetune/index.html b/tutorials/finetune/index.html new file mode 100644 index 000000000..c91e32617 --- /dev/null +++ b/tutorials/finetune/index.html @@ -0,0 +1,1706 @@ + + + + + + + + + + + + + + + + + + + + + + + + Finetune - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Model Fine-Tuning Training

+

Download Notebook

+

In this tutorial, you will learn how to use MindCV for transfer Learning to solve the problem of image classification on custom datasets. +In the deep learning task, we often encounter the problem of insufficient training data. At this time, it is difficult to train the entire network directly to achieve the desired accuracy. +A better approach is to use a pretrained model on a large dataset (close to the task data), and then use the model to initialize the network's weight parameters or apply it to specific tasks as a fixed feature extractor.

+

This tutorial will use the DenseNet model pretrained on ImageNet as an example to introduce two different fine-tuning strategies to solve the image classification problem of wolves and dogs in the case of small samples:

+
    +
  1. Overall model fine-tuning.
  2. +
  3. Freeze backbone and only fine-tune the classifier.
  4. +
+
+

For details of transfer learning, see Stanford University CS231n

+
+

Data Preparation

+

Download Dataset

+

Download the dog and wolf classification dataset used in the case. +Each category has 120 training images and 30 verification images. Use the mindcv.utils.download interface to download the dataset, and automatically unzip the downloaded dataset to the current directory.

+
import os
+from mindcv.utils.download import DownLoad
+
+dataset_url = "https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/notebook/datasets/intermediate/Canidae_data.zip"
+root_dir = "./"
+
+if not os.path.exists(os.path.join(root_dir, 'data/Canidae')):
+    DownLoad().download_and_extract_archive(dataset_url, root_dir)
+
+

The directory structure of the dataset is as follows:

+
data/
+└── Canidae
+    ├── train
+    │   ├── dogs
+    │   └── wolves
+    └── val
+        ├── dogs
+        └── wolves
+
+

Dataset Loading and Processing

+

Loading Custom Datasets

+

By calling the create_dataset function in mindcv.data, we can easily load preset and customized datasets.

+
    +
  • When the parameter name is set to null, it is specified as a user-defined dataset. (Default)
  • +
  • When the parameter name is set to be MNIST, CIFAR10 or other standard dataset names, it is specified as the preset dataset.
  • +
+

At the same time, we need to set the path data_dir of the dataset and the name split of the data segmentation (such as train, val) to load the corresponding training set or validation set.

+
from mindcv.data import create_dataset, create_transforms, create_loader
+
+num_workers = 8
+
+# path of dataset
+data_dir = "./data/Canidae/"
+
+# load dataset
+dataset_train = create_dataset(root=data_dir, split='train', num_parallel_workers=num_workers)
+dataset_val = create_dataset(root=data_dir, split='val', num_parallel_workers=num_workers)
+
+
+

Note: The directory structure of the custom dataset should be the same as ImageNet, that is, the hierarchy of root ->split ->class ->image

+
+
DATASET_NAME
+    ├── split1(e.g. train)/
+    │  ├── class1/
+    │  │   ├── 000001.jpg
+    │  │   ├── 000002.jpg
+    │  │   └── ....
+    │  └── class2/
+    │      ├── 000001.jpg
+    │      ├── 000002.jpg
+    │      └── ....
+    └── split2/
+       ├── class1/
+       │   ├── 000001.jpg
+       │   ├── 000002.jpg
+       │   └── ....
+       └── class2/
+           ├── 000001.jpg
+           ├── 000002.jpg
+           └── ....
+
+

Data Processing and Augmentation

+

First, we call the create_transforms function to obtain the preset data processing and augmentation strategy (transform list). In this task, because the file structure of the wolf-dog dataset is consistent with that of the ImageNet dataset, we specify the parameter dataset_name as ImageNet, and directly use the preset ImageNet data processing and image augmentation strategy. create_transforms also supports a variety of customized processing and enhancement operations, as well as automatic enhancement policies (AutoAug). See API description for details.

+

We will transfer the obtained transform list to the create_loader(), specify batch_size and other parameters to complete the preparation of training and validation data, and return the Dataset Object as the input of the model.

+
# Define and acquire data processing and augment operations
+trans_train = create_transforms(dataset_name='ImageNet', is_training=True)
+trans_val = create_transforms(dataset_name='ImageNet',is_training=False)
+
+loader_train = create_loader(
+    dataset=dataset_train,
+    batch_size=16,
+    is_training=True,
+    num_classes=2,
+    transform=trans_train,
+    num_parallel_workers=num_workers,
+)
+loader_val = create_loader(
+    dataset=dataset_val,
+    batch_size=5,
+    is_training=True,
+    num_classes=2,
+    transform=trans_val,
+    num_parallel_workers=num_workers,
+)
+
+

Dataset Visualization

+

For the Dataset object returned by the create_loader interface to complete data loading, we can create a data iterator through the create_tuple_iterator interface, access the dataset using the next iteration, and read a batch of data.

+
images, labels = next(loader_train.create_tuple_iterator())
+print("Tensor of image", images.shape)
+print("Labels:", labels)
+
+
Tensor of image (16, 3, 224, 224)
+Labels: [0 1 1 1 1 0 0 0 0 0 1 0 1 0 1 1]
+
+

Visualize the acquired image and label data, and the title is the label name corresponding to the image.

+
import matplotlib.pyplot as plt
+import numpy as np
+
+# class_name corresponds to label, and labels are marked in the order of folder string from small to large
+class_name = {0: "dogs", 1: "wolves"}
+
+plt.figure(figsize=(15, 7))
+for i in range(len(labels)):
+    # Get the image and its corresponding label
+    data_image = images[i].asnumpy()
+    data_label = labels[i]
+    # Process images for display
+    data_image = np.transpose(data_image, (1, 2, 0))
+    mean = np.array([0.485, 0.456, 0.406])
+    std = np.array([0.229, 0.224, 0.225])
+    data_image = std * data_image + mean
+    data_image = np.clip(data_image, 0, 1)
+    # Show Image
+    plt.subplot(3, 6, i + 1)
+    plt.imshow(data_image)
+    plt.title(class_name[int(labels[i].asnumpy())])
+    plt.axis("off")
+
+plt.show()
+
+

png

+

Model Fine-Tuning

+

1. Overall Model Fine-Tuning

+

Pretraining Model Loading

+

We use mindcv.models.densenet to define the DenseNet121 network. When the pretrained parameter in the interface is set to True, the network weight can be automatically downloaded. +Since the pretraining model is used to classify 1000 categories in the ImageNet dataset, we set num_classes=2, and the output of DenseNet's classifier (the last FC layer) is adjusted to two dimensions. At this time, only the pre-trained weights of the backbone are loaded, while the classifier uses the initial value.

+
from mindcv.models import create_model
+
+network = create_model(model_name='densenet121', num_classes=2, pretrained=True)
+
+
+

For the specific structure of DenseNet, see the DenseNet paper.

+
+

Model Training

+

Use the loaded and processed wolf and dog images with tags to fine-tune the DenseNet network. Note that smaller learning rates should be used when fine-tuning the overall model.

+
from mindcv.loss import create_loss
+from mindcv.optim import create_optimizer
+from mindcv.scheduler import create_scheduler
+from mindspore import Model, LossMonitor, TimeMonitor
+
+# Define optimizer and loss function
+opt = create_optimizer(network.trainable_params(), opt='adam', lr=1e-4)
+loss = create_loss(name='CE')
+
+# Instantiated model
+model = Model(network, loss_fn=loss, optimizer=opt, metrics={'accuracy'})
+model.train(10, loader_train, callbacks=[LossMonitor(5), TimeMonitor(5)], dataset_sink_mode=False)
+
+
epoch: 1 step: 5, loss is 0.5195528864860535
+epoch: 1 step: 10, loss is 0.2654373049736023
+epoch: 1 step: 15, loss is 0.28758567571640015
+Train epoch time: 17270.144 ms, per step time: 1151.343 ms
+epoch: 2 step: 5, loss is 0.1807008981704712
+epoch: 2 step: 10, loss is 0.1700802594423294
+epoch: 2 step: 15, loss is 0.09752683341503143
+Train epoch time: 1372.549 ms, per step time: 91.503 ms
+epoch: 3 step: 5, loss is 0.13594701886177063
+epoch: 3 step: 10, loss is 0.03628234937787056
+epoch: 3 step: 15, loss is 0.039737217128276825
+Train epoch time: 1453.237 ms, per step time: 96.882 ms
+epoch: 4 step: 5, loss is 0.014213413000106812
+epoch: 4 step: 10, loss is 0.030747078359127045
+epoch: 4 step: 15, loss is 0.0798817127943039
+Train epoch time: 1331.237 ms, per step time: 88.749 ms
+epoch: 5 step: 5, loss is 0.009510636329650879
+epoch: 5 step: 10, loss is 0.02603740245103836
+epoch: 5 step: 15, loss is 0.051846928894519806
+Train epoch time: 1312.737 ms, per step time: 87.516 ms
+epoch: 6 step: 5, loss is 0.1163717582821846
+epoch: 6 step: 10, loss is 0.02439398318529129
+epoch: 6 step: 15, loss is 0.02564268559217453
+Train epoch time: 1434.704 ms, per step time: 95.647 ms
+epoch: 7 step: 5, loss is 0.013310655951499939
+epoch: 7 step: 10, loss is 0.02289542555809021
+epoch: 7 step: 15, loss is 0.1992517113685608
+Train epoch time: 1275.935 ms, per step time: 85.062 ms
+epoch: 8 step: 5, loss is 0.015928998589515686
+epoch: 8 step: 10, loss is 0.011409260332584381
+epoch: 8 step: 15, loss is 0.008141174912452698
+Train epoch time: 1323.102 ms, per step time: 88.207 ms
+epoch: 9 step: 5, loss is 0.10395607352256775
+epoch: 9 step: 10, loss is 0.23055407404899597
+epoch: 9 step: 15, loss is 0.04896317049860954
+Train epoch time: 1261.067 ms, per step time: 84.071 ms
+epoch: 10 step: 5, loss is 0.03162381425499916
+epoch: 10 step: 10, loss is 0.13094250857830048
+epoch: 10 step: 15, loss is 0.020028553903102875
+Train epoch time: 1217.958 ms, per step time: 81.197 ms
+
+

Model Evaluation

+

After the training, we evaluate the accuracy of the model on the validation set.

+
res = model.eval(loader_val)
+print(res)
+
+
{'accuracy': 1.0}
+
+

Visual Model Inference Results

+

Define visualize_mode function and visualize model prediction.

+
import matplotlib.pyplot as plt
+import mindspore as ms
+
+def visualize_model(model, val_dl, num_classes=2):
+    # Load the data of the validation set for validation
+    images, labels= next(val_dl.create_tuple_iterator())
+    # Predict image class
+    output = model.predict(images)
+    pred = np.argmax(output.asnumpy(), axis=1)
+    # Display images and their predicted values
+    images = images.asnumpy()
+    labels = labels.asnumpy()
+    class_name = {0: "dogs", 1: "wolves"}
+    plt.figure(figsize=(15, 7))
+    for i in range(len(labels)):
+        plt.subplot(3, 6, i + 1)
+        # If the prediction is correct, it is displayed in blue; If the prediction is wrong, it is displayed in red
+        color = 'blue' if pred[i] == labels[i] else 'red'
+        plt.title('predict:{}'.format(class_name[pred[i]]), color=color)
+        picture_show = np.transpose(images[i], (1, 2, 0))
+        mean = np.array([0.485, 0.456, 0.406])
+        std = np.array([0.229, 0.224, 0.225])
+        picture_show = std * picture_show + mean
+        picture_show = np.clip(picture_show, 0, 1)
+        plt.imshow(picture_show)
+        plt.axis('off')
+
+    plt.show()
+
+

Use the finely tuned model piece to predict the wolf and dog image data of the verification set. If the prediction font is blue, the prediction is correct; if the prediction font is red, the prediction is wrong.

+
visualize_model(model, loader_val)
+
+

png

+

2. Freeze Backbone and Fine-Tune the Classifier

+

Freezing Backbone Parameters

+

First, we need to freeze all network layers except the last layer classifier, that is, set the requires_grad attribute of the corresponding layer parameter to False, so that it does not calculate the gradient and update the parameters in the backpropagation.

+

Because all models in mindcv.models use a classifier to identify and name the classifier of the model (i.e., the Dense layer), the parameters of each layer outside the classifier can be filtered through classifier.weight and classifier.bias, and its requires_grad attribute is set to False.

+
# freeze backbone
+for param in network.get_parameters():
+    if param.name not in ["classifier.weight", "classifier.bias"]:
+        param.requires_grad = False
+
+

Fine-Tune Classifier

+

Because the feature network has been fixed, we don't have to worry about distortpratised features in the training process. Therefore, compared with the first method, we can increase the learning rate.

+

Compared with no pretraining model, it will save more than half of the time, because partial gradient can not be calculated at this time.

+
# dataset load
+dataset_train = create_dataset(root=data_dir, split='train', num_parallel_workers=num_workers)
+loader_train = create_loader(
+    dataset=dataset_train,
+    batch_size=16,
+    is_training=True,
+    num_classes=2,
+    transform=trans_train,
+    num_parallel_workers=num_workers,
+)
+
+# Define optimizer and loss function
+opt = create_optimizer(network.trainable_params(), opt='adam', lr=1e-3)
+loss = create_loss(name='CE')
+
+# Instantiated model
+model = Model(network, loss_fn=loss, optimizer=opt, metrics={'accuracy'})
+
+model.train(10, loader_train, callbacks=[LossMonitor(5), TimeMonitor(5)], dataset_sink_mode=False)
+
+
epoch: 1 step: 5, loss is 0.051333948969841
+epoch: 1 step: 10, loss is 0.02043312042951584
+epoch: 1 step: 15, loss is 0.16161368787288666
+Train epoch time: 10228.601 ms, per step time: 681.907 ms
+epoch: 2 step: 5, loss is 0.002121545374393463
+epoch: 2 step: 10, loss is 0.0009798109531402588
+epoch: 2 step: 15, loss is 0.015776708722114563
+Train epoch time: 562.543 ms, per step time: 37.503 ms
+epoch: 3 step: 5, loss is 0.008056879043579102
+epoch: 3 step: 10, loss is 0.0009347647428512573
+epoch: 3 step: 15, loss is 0.028648357838392258
+Train epoch time: 523.249 ms, per step time: 34.883 ms
+epoch: 4 step: 5, loss is 0.001014217734336853
+epoch: 4 step: 10, loss is 0.0003159046173095703
+epoch: 4 step: 15, loss is 0.0007699579000473022
+Train epoch time: 508.886 ms, per step time: 33.926 ms
+epoch: 5 step: 5, loss is 0.0015687644481658936
+epoch: 5 step: 10, loss is 0.012090332806110382
+epoch: 5 step: 15, loss is 0.004598274827003479
+Train epoch time: 507.243 ms, per step time: 33.816 ms
+epoch: 6 step: 5, loss is 0.010022152215242386
+epoch: 6 step: 10, loss is 0.0066385045647621155
+epoch: 6 step: 15, loss is 0.0036080628633499146
+Train epoch time: 517.646 ms, per step time: 34.510 ms
+epoch: 7 step: 5, loss is 0.01344013586640358
+epoch: 7 step: 10, loss is 0.0008538365364074707
+epoch: 7 step: 15, loss is 0.14135593175888062
+Train epoch time: 511.513 ms, per step time: 34.101 ms
+epoch: 8 step: 5, loss is 0.01626245677471161
+epoch: 8 step: 10, loss is 0.02871556021273136
+epoch: 8 step: 15, loss is 0.010110966861248016
+Train epoch time: 545.678 ms, per step time: 36.379 ms
+epoch: 9 step: 5, loss is 0.008498094975948334
+epoch: 9 step: 10, loss is 0.2588501274585724
+epoch: 9 step: 15, loss is 0.0014278888702392578
+Train epoch time: 499.243 ms, per step time: 33.283 ms
+epoch: 10 step: 5, loss is 0.021337147802114487
+epoch: 10 step: 10, loss is 0.00829876959323883
+epoch: 10 step: 15, loss is 0.008352771401405334
+Train epoch time: 465.600 ms, per step time: 31.040 ms
+
+

Model Evaluation

+

After the training, we evaluate the accuracy of the model on the validation set.

+
dataset_val = create_dataset(root=data_dir, split='val', num_parallel_workers=num_workers)
+loader_val = create_loader(
+    dataset=dataset_val,
+    batch_size=5,
+    is_training=True,
+    num_classes=2,
+    transform=trans_val,
+    num_parallel_workers=num_workers,
+)
+
+res = model.eval(loader_val)
+print(res)
+
+
{'accuracy': 1.0}
+
+

Visual Model Prediction

+

Use the finely tuned model piece to predict the wolf and dog image data of the verification set. If the prediction font is blue, the prediction is correct; if the prediction font is red, the prediction is wrong.

+
visualize_model(model, loader_val)
+
+

png

+

The prediction results of wolf/dog after fine-tuning are correct.

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/tutorials/inference/index.html b/tutorials/inference/index.html new file mode 100644 index 000000000..3f7ae2455 --- /dev/null +++ b/tutorials/inference/index.html @@ -0,0 +1,1288 @@ + + + + + + + + + + + + + + + + + + + + + + + + Inference - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Image Classification Prediction

+

Download Notebook

+

This tutorial introduces how to call the pretraining model in MindCV to make classification prediction on the test image.

+

Model Loading

+

View All Available Models

+

By calling the registry.list_models function in mindcv.models, the names of all network models can be printed. The models of a network in different parameter configurations will also be printed, such as resnet18 / resnet34 / resnet50 / resnet101 / resnet152.

+
import sys
+sys.path.append("..")
+from mindcv.models import registry
+registry.list_models()
+
+
['BiT_resnet50',
+ 'repmlp_b224',
+ 'repmlp_b256',
+ 'repmlp_d256',
+ 'repmlp_l256',
+ 'repmlp_t224',
+ 'repmlp_t256',
+ 'convit_base',
+ 'convit_base_plus',
+ 'convit_small',
+ ...
+ 'visformer_small',
+ 'visformer_small_v2',
+ 'visformer_tiny',
+ 'visformer_tiny_v2',
+ 'vit_b_16_224',
+ 'vit_b_16_384',
+ 'vit_b_32_224',
+ 'vit_b_32_384',
+ 'vit_l_16_224',
+ 'vit_l_16_384',
+ 'vit_l_32_224',
+ 'xception']
+
+

Load Pretraining Model

+

Taking the resnet50 model as an example, we introduce two methods to load the model checkpoint using the create_model function in mindcv.models.

+

1). When the pretrained parameter in the interface is set to True, network weights can be automatically downloaded.

+
from mindcv.models import create_model
+model = create_model(model_name='resnet50', num_classes=1000, pretrained=True)
+# Switch the execution logic of the network to the inference scenario
+model.set_train(False)
+
+
102453248B [00:16, 6092186.31B/s]
+
+ResNet<
+  (conv1): Conv2d<input_channels=3, output_channels=64, kernel_size=(7, 7), stride=(2, 2), pad_mode=pad, padding=3, dilation=(1, 1), group=1, has_bias=False, weight_init=normal, bias_init=zeros, format=NCHW>
+  (bn1): BatchNorm2d<num_features=64, eps=1e-05, momentum=0.9, gamma=Parameter (name=bn1.gamma, shape=(64,), dtype=Float32, requires_grad=True), beta=Parameter (name=bn1.beta, shape=(64,), dtype=Float32, requires_grad=True), moving_mean=Parameter (name=bn1.moving_mean, shape=(64,), dtype=Float32, requires_grad=False), moving_variance=Parameter (name=bn1.moving_variance, shape=(64,), dtype=Float32, requires_grad=False)>
+  (relu): ReLU<>
+  (max_pool): MaxPool2d<kernel_size=3, stride=2, pad_mode=SAME>
+  ...
+  (pool): GlobalAvgPooling<>
+  (classifier): Dense<input_channels=2048, output_channels=1000, has_bias=True>
+  >
+
+

2). When the checkpoint_path parameter in the interface is set to the file path, the model parameter file with the .ckpt can be loaded.

+
from mindcv.models import create_model
+model = create_model(model_name='resnet50', num_classes=1000, checkpoint_path='./resnet50_224.ckpt')
+# Switch the execution logic of the network to the inference scenario
+model.set_train(False)
+
+

Data Preparation

+

Create Dataset

+

Here, we download a Wikipedia image as a test image, and use the create_dataset function in mindcv.data to construct a custom dataset for a single image.

+
from mindcv.data import create_dataset
+num_workers = 1
+# path of dataset
+data_dir = "./data/"
+dataset = create_dataset(root=data_dir, split='test', num_parallel_workers=num_workers)
+# Image visualization
+from PIL import Image
+Image.open("./data/test/dog/dog.jpg")
+
+

png

+

Data Preprocessing

+

Call the create_transforms function to obtain the data processing strategy (transform list) of the ImageNet dataset used by the pre-trained model.

+

We pass the obtained transform list into the create_loader function, specify batch_size=1 and other parameters, and then complete the preparation of test data. The Dataset object is returned as the input of the model.

+
from mindcv.data import create_transforms, create_loader
+transforms_list = create_transforms(dataset_name='imagenet', is_training=False)
+data_loader = create_loader(
+    dataset=dataset,
+    batch_size=1,
+    is_training=False,
+    num_classes=1000,
+    transform=transforms_list,
+    num_parallel_workers=num_workers
+)
+
+

Model Inference

+

The picture of the user-defined dataset is transferred to the model to obtain the inference result. Here, use the Squeeze function of mindspore.ops to remove the batch dimension.

+
import mindspore.ops as P
+import numpy as np
+images, _ = next(data_loader.create_tuple_iterator())
+output = P.Squeeze()(model(images))
+pred = np.argmax(output.asnumpy())
+
+
with open("imagenet1000_clsidx_to_labels.txt") as f:
+    idx2label = eval(f.read())
+print('predict: {}'.format(idx2label[pred]))
+
+
predict: Labrador retriever
+
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/tutorials/quick_start/index.html b/tutorials/quick_start/index.html new file mode 100644 index 000000000..4438de056 --- /dev/null +++ b/tutorials/quick_start/index.html @@ -0,0 +1,1419 @@ + + + + + + + + + + + + + + + + + + + + + + + + Quick Start - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Quick Start

+

Download Notebook

+

MindCV is an open-source toolbox for computer vision research and development based on MindSpore. +It collects a series of classic and SoTA vision models, such as ResNet and SwinTransformer, along with their pretrained weights. +SoTA methods such as AutoAugment are also provided for performance improvement. +With the decoupled module design, it is easy to apply or adapt MindCV to your own CV tasks. +In this tutorial, we will provide a quick start guideline for MindCV.

+

This tutorial will take DenseNet classification model as an example to implement transfer training on CIFAR-10 dataset and explain the usage of MindCV modules in this process.

+

Environment Setting

+

See Installation for details.

+

Data

+

Dataset

+

Through the create_dataset module in mindcv.data, we can quickly load standard datasets or customized datasets.

+
import os
+from mindcv.data import create_dataset, create_transforms, create_loader
+
+cifar10_dir = './datasets/cifar/cifar-10-batches-bin'  # your dataset path
+num_classes = 10  # num of classes
+num_workers = 8  # num of parallel workers
+
+# create dataset
+dataset_train = create_dataset(
+    name='cifar10', root=cifar10_dir, split='train', shuffle=True, num_parallel_workers=num_workers
+)
+
+

Transform

+

Through the create_transforms function, you can directly obtain the appropriate data processing augmentation strategies (transform list) for standard datasets, including common data processing strategies on Cifar10 and Imagenet.

+
# create transforms
+trans = create_transforms(dataset_name='cifar10', image_resize=224)
+
+

Loader

+

The mindcv.data.create_loader function is used for data conversion and batch split loading. We need to pass in the transform_list returned by create_transforms.

+
# Perform data augmentation operations to generate the required dataset.
+loader_train = create_loader(dataset=dataset_train,
+                             batch_size=64,
+                             is_training=True,
+                             num_classes=num_classes,
+                             transform=trans,
+                             num_parallel_workers=num_workers)
+
+num_batches = loader_train.get_dataset_size()
+
+
+

Avoid repeatedly executing a single cell of create_loader in notebook, or execute again after executing create_dataset.

+
+

Model

+

Use the create_model interface to obtain the instantiated DenseNet and load the pretraining weight(obtained from ImageNet dataset training).

+
from mindcv.models import create_model
+
+# instantiate the DenseNet121 model and load the pretraining weights.
+network = create_model(model_name='densenet121', num_classes=num_classes, pretrained=True)
+
+
+

Because the number of classes required by CIFAR-10 and ImageNet datasets is different, the classifier parameters cannot be shared, and the warning that the classifier parameters cannot be loaded does not affect the fine-tuning.

+
+

Loss

+

By create_loss interface obtains loss function.

+
from mindcv.loss import create_loss
+
+loss = create_loss(name='CE')
+
+

Learning Rate Scheduler

+

Use create_scheduler interface to set the learning rate scheduler.

+
from mindcv.scheduler import create_scheduler
+
+# learning rate scheduler
+lr_scheduler = create_scheduler(steps_per_epoch=num_batches,
+                                scheduler='constant',
+                                lr=0.0001)
+
+

Optimizer

+

Use create_optimizer interface creates an optimizer.

+
from mindcv.optim import create_optimizer
+
+# create optimizer
+opt = create_optimizer(network.trainable_params(), opt='adam', lr=lr_scheduler)
+
+

Training

+

Use the mindspore.Model interface to encapsulate trainable instances according to the parameters passed in by the user.

+
from mindspore import Model
+
+# Encapsulates examples that can be trained or inferred
+model = Model(network, loss_fn=loss, optimizer=opt, metrics={'accuracy'})
+
+

Use the mindspore.Model.train interface for model training.

+
from mindspore import LossMonitor, TimeMonitor, CheckpointConfig, ModelCheckpoint
+
+# Set the callback function for saving network parameters during training.
+ckpt_save_dir = './ckpt'
+ckpt_config = CheckpointConfig(save_checkpoint_steps=num_batches)
+ckpt_cb = ModelCheckpoint(prefix='densenet121-cifar10',
+                          directory=ckpt_save_dir,
+                          config=ckpt_config)
+
+model.train(5, loader_train, callbacks=[LossMonitor(num_batches//5), TimeMonitor(num_batches//5), ckpt_cb], dataset_sink_mode=False)
+
+
[WARNING] DEVICE(1769,ffff87c70ac0,python):2022-12-21-16:04:30.001.890 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:330] FilterRaisedOrReducePrecisionMatchedKernelInfo] Operator:[Default/network-WithLossCell/_loss_fn-CrossEntropySmooth/GatherD-op5273] don't support int64, reduce precision from int64 to int32.
+
+
+epoch: 1 step: 156, loss is 2.0816354751586914
+epoch: 1 step: 312, loss is 1.4474115371704102
+epoch: 1 step: 468, loss is 0.8935483694076538
+epoch: 1 step: 624, loss is 0.5588696002960205
+epoch: 1 step: 780, loss is 0.3161369860172272
+
+
+[WARNING] DEVICE(1769,ffff87c70ac0,python):2022-12-21-16:09:20.261.851 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:330] FilterRaisedOrReducePrecisionMatchedKernelInfo] Operator:[Default/network-WithLossCell/_loss_fn-CrossEntropySmooth/GatherD-op16720] don't support int64, reduce precision from int64 to int32.
+
+
+Train epoch time: 416429.509 ms, per step time: 532.519 ms
+epoch: 2 step: 154, loss is 0.19752007722854614
+epoch: 2 step: 310, loss is 0.14635677635669708
+epoch: 2 step: 466, loss is 0.3511860966682434
+epoch: 2 step: 622, loss is 0.12542471289634705
+epoch: 2 step: 778, loss is 0.22351759672164917
+Train epoch time: 156746.872 ms, per step time: 200.444 ms
+epoch: 3 step: 152, loss is 0.08965137600898743
+epoch: 3 step: 308, loss is 0.22765043377876282
+epoch: 3 step: 464, loss is 0.19035443663597107
+epoch: 3 step: 620, loss is 0.06591956317424774
+epoch: 3 step: 776, loss is 0.0934530645608902
+Train epoch time: 156574.210 ms, per step time: 200.223 ms
+epoch: 4 step: 150, loss is 0.03782692924141884
+epoch: 4 step: 306, loss is 0.023876197636127472
+epoch: 4 step: 462, loss is 0.038690414279699326
+epoch: 4 step: 618, loss is 0.15388774871826172
+epoch: 4 step: 774, loss is 0.1581358164548874
+Train epoch time: 158398.108 ms, per step time: 202.555 ms
+epoch: 5 step: 148, loss is 0.06556802988052368
+epoch: 5 step: 304, loss is 0.006707251071929932
+epoch: 5 step: 460, loss is 0.02353120595216751
+epoch: 5 step: 616, loss is 0.014183484017848969
+epoch: 5 step: 772, loss is 0.09367241710424423
+Train epoch time: 154978.618 ms, per step time: 198.182 ms
+
+

Evaluation

+

Now, let's evaluate the trained model on the validation set of CIFAR-10.

+
# Load validation dataset
+dataset_val = create_dataset(
+    name='cifar10', root=cifar10_dir, split='test', shuffle=True, num_parallel_workers=num_workers
+)
+
+# Perform data enhancement operations to generate the required dataset.
+loader_val = create_loader(dataset=dataset_val,
+                           batch_size=64,
+                           is_training=False,
+                           num_classes=num_classes,
+                           transform=trans,
+                           num_parallel_workers=num_workers)
+
+

Load the fine-tuning parameter file (densenet121-cifar10-5_782.ckpt) to the model.

+

Encapsulate inferable instances according to the parameters passed in by the user, load the validation dataset and verify the precision of the fine-tuned DenseNet121 model.

+
# Verify the accuracy of DenseNet121 after fine-tune
+acc = model.eval(loader_val, dataset_sink_mode=False)
+print(acc)
+
+
[WARNING] DEVICE(1769,ffff87c70ac0,python):2022-12-21-16:24:11.927.472 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:330] FilterRaisedOrReducePrecisionMatchedKernelInfo] Operator:[Default/network-WithLossCell/_loss_fn-CrossEntropySmooth/GatherD-op24314] don't support int64, reduce precision from int64 to int32.
+
+
+{'accuracy': 0.951}
+
+
+[WARNING] DEVICE(1769,ffff87c70ac0,python):2022-12-21-16:25:01.871.273 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:330] FilterRaisedOrReducePrecisionMatchedKernelInfo] Operator:[Default/network-WithLossCell/_loss_fn-CrossEntropySmooth/GatherD-op27139] don't support int64, reduce precision from int64 to int32.
+
+

Use YAML files for model training and validation

+

We can also use the yaml file with the model parameters set directly to quickly train and verify the model through train.py and validate.py scripts. +The following is an example of training SqueezenetV1 on ImageNet (you need to download ImageNet to the directory in advance).

+
+

For detailed tutorials, please refer to the tutorial.

+
+
# standalone training on a CPU/GPU/Ascend device
+python train.py -c configs/squeezenet/squeezenet_1.0_gpu.yaml --data_dir /path/to/dataset --distribute False
+
+
python validate.py -c configs/squeezenet/squeezenet_1.0_gpu.yaml --data_dir /path/to/dataset --ckpt_path /path/to/ckpt
+
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/__pycache__/gen_ref_pages.cpython-38.pyc b/zh/__pycache__/gen_ref_pages.cpython-38.pyc new file mode 100644 index 000000000..fa205b2c7 Binary files /dev/null and b/zh/__pycache__/gen_ref_pages.cpython-38.pyc differ diff --git a/zh/gen_ref_pages.py b/zh/gen_ref_pages.py new file mode 100644 index 000000000..9eeb4f9d3 --- /dev/null +++ b/zh/gen_ref_pages.py @@ -0,0 +1,59 @@ +"""Generate the code reference pages of models.""" +import os +import sys + +sys.path.append(".") + +import importlib +import logging +from pathlib import Path + +_logger = logging.getLogger('mkdocs') +_langs = ["en", "zh"] + + +def _gen_page(lang): + full_doc_path = Path(f"docs/{lang}/reference/models.md") + _logger.info(f"Generating reference page: {full_doc_path}") + with open(full_doc_path, "w") as fd: + print("# Models", file=fd) + print("\n\n## Create Model", file=fd) + print("\n### ::: mindcv.models.model_factory.create_model", file=fd) + + for path in sorted(Path("mindcv/models").rglob("*.py")): + module_path = path.with_suffix("") # eg: mindcv/models/resnet + parts = list(module_path.parts) # eg: ["mindcv", "models", "resnet"] + if parts[-1].startswith("__") or parts[-2] == "layers": + continue + # fileter out utility modules + if parts[-1] in ["model_factory", "registry", "utils", "helpers"]: + continue + # filter out the net module which is replaced by the net function with the same name + # TODO: we need to change mechanism of model importing + if parts[-1] in ["googlenet", "inception_v3", "inception_v4", "xception", "pnasnet"]: + continue + + try: + print(f"\n\n## {parts[-1]}", file=fd) + identifier = ".".join(parts) # eg: mindcv.models.resnet + mod = importlib.import_module(identifier) + for mem in sorted(set(mod.__all__)): + print(f"\n### ::: {identifier}.{mem}", file=fd) + except Exception as err: + _logger.warning(f"Cannot generate reference of {identifier}, error: {err}.") + + +def _del_page(lang): + full_doc_path = Path(f"docs/{lang}/reference/models.md") + _logger.info(f"Cleaning generated reference page: {full_doc_path}") + os.remove(full_doc_path) + + +def on_startup(command, dirty): + for lang in _langs: + _gen_page(lang) + + +def on_shutdown(): + for lang in _langs: + _del_page(lang) diff --git a/zh/how_to_guides/write_a_new_model/index.html b/zh/how_to_guides/write_a_new_model/index.html new file mode 100644 index 000000000..c0d5c20fd --- /dev/null +++ b/zh/how_to_guides/write_a_new_model/index.html @@ -0,0 +1,1396 @@ + + + + + + + + + + + + + + + + + + + + + + + + Write A New Model - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

模型编写指南

+

本文档提供了编写MindSpore套件中的模型定义文件model.py的参考模板,旨在提供一种统一的代码风格。

+

接下来我们以相对简单的新模型MLP-Mixer作为示例。

+

文件头

+

该文件的**简要描述**。包含模型名称和论文题目。如下所示:

+
"""
+MindSpore implementation of `${MODEL_NAME}`.
+Refer to ${PAPER_NAME}.
+"""
+
+

模块导入

+

模块导入分为三种类型。分别为

+
    +
  • Python原生或第三方库。如import mathimport numpy as np等等。应当放在第一梯队。
  • +
  • MindSpore相关模块。如import mindspore.nn as nnimport mindspore.ops as ops等等。应当放在第二梯队。
  • +
  • 套件包内模块。如from .layers.classifier import ClassifierHead等等。应当放在第三梯队,并使用相对导入。
  • +
+

示例如下:

+
import math
+from collections import OrderedDict
+
+import mindspore.nn as nn
+import mindspore.ops as ops
+import mindspore.common.initializer as init
+
+from .utils import load_pretrained
+from .layers.classifier import ClassifierHead
+
+

仅导入必须的模块或包,避免导入无用包。

+

__all__

+
+

Python 没有原生的可见性控制,其可见性的维护是靠一套需要大家自觉遵守的“约定”。__all__ 是针对模块公开接口的一种约定,以提供了”白名单“的形式暴露接口。如果定义了__all__,其他文件中使用from xxx import *导入该文件时,只会导入__all__列出的成员,可以其他成员都被排除在外。

+
+

我们约定模型中对外暴露的接口包括主模型类以及返回不同规格模型的函数,例如:

+
__all__ = [
+    "MLPMixer",
+    "mlp_mixer_s_p32",
+    "mlp_mixer_s_p16",
+    ...
+]
+
+

其中"MLPMixer"是主模型类,"mlp_mixer_s_p32""mlp_mixer_s_p16"等是返回不同规格模型的函数。一般来说子模型,即某Layer或某Block是不应该被其他文件所共用的。如若此,应当考虑将该子模型提取到${MINDCLS}/models/layers下面作为公用模块,如SEBlock等。

+

子模型

+

我们都知道一个深度模型是由多层组成的网络。其中某些层可以组成相同拓扑结构的子模型,我们一般称其为Layer或者Block,例如ResidualBlock等。这种抽象有利于我们理解整个模型结构,也有利于代码的编写。

+

我们应当通过类注释对子模型进行功能的简要描述。在MindSpore中,模型的类继承于nn.Cell,一般来说我们需要重载以下两个函数:

+
    +
  • __init__函数中,我们应当定义模型中需要用到的神经网络层(__init__中的参数要进行参数类型声明,即type hint)。
  • +
  • construct函数中我们定义模型前向逻辑。
  • +
+

示例如下:

+
class MixerBlock(nn.Cell):
+    """Mixer Layer with token-mixing MLP and channel-mixing MLP"""
+
+    def __init__(self,
+                 n_patches: int,
+                 n_channels: int,
+                 token_dim: int,
+                 channel_dim: int,
+                 dropout: float = 0.
+                 ) -> None:
+        super().__init__()
+        self.token_mix = nn.SequentialCell(
+            nn.LayerNorm((n_channels,)),
+            TransPose((0, 2, 1)),
+            FeedForward(n_patches, token_dim, dropout),
+            TransPose((0, 2, 1))
+        )
+        self.channel_mix = nn.SequentialCell(
+            nn.LayerNorm((n_channels,)),
+            FeedForward(n_channels, channel_dim, dropout),
+        )
+
+    def construct(self, x):
+        x = x + self.token_mix(x)
+        x = x + self.channel_mix(x)
+        return x
+
+

nn.Cell类的编写过程中,有两个值得注意的方面

+
    +
  • +

    CellList & SequentialCell

    +
  • +
  • +

    CellList is just a container that contains a list of neural network layers(Cell). The Cells contained by it can be properly registered, and will be visible by all Cell methods. We must overwrite the forward calculation, that is, the construct function.

    +
  • +
  • +

    SequentialCell is a container that holds a sequential list of layers(Cell). The Cells may have a name(OrderedDict) or not(List). We don't need to implement forward computation, which is done according to the order of the sequential list.

    +
  • +
  • +

    construct

    +
  • +
  • +

    Assert is not supported. [RuntimeError: ParseStatement] Unsupported statement 'Assert'.

    +
  • +
  • +

    Usage of single operator。调用算子时(如concat, reshape, mean),使用函数式接口 mindspore.ops.functional (如 output=ops.concat((x1, x2))),避免先在__init__中实例化原始算子 ops.Primitive (如self.concat=ops.Concat()) 再在construct中调用(output=self.concat((x1, x2)))。

    +
  • +
+

主模型

+

主模型是论文中所提出的网络模型定义,由多个子模型堆叠而成。它是适用于分类、检测等任务的最顶层网络。它在代码书写上与子模型上基本类似,但有几处不同。

+
    +
  • 类注释。我们应当在此给出论文的题目和链接。另外由于该类对外暴露,我们最好也加上类初始化参数的说明。详见下方代码。
  • +
  • forward_features函数。在函数内对模型的特征网络的运算定义。
  • +
  • forward_head函数。在函数内对模型的分类器的运算进行定义。
  • +
  • construct函数。在函数调用特征网络和分类器的运算。
  • +
  • _initialize_weights函数。我们约定模型参数的随机初始化由该成员函数完成。详见下方代码。
  • +
+

示例如下:

+
class MLPMixer(nn.Cell):
+    r"""MLP-Mixer model class, based on
+    `"MLP-Mixer: An all-MLP Architecture for Vision" <https://arxiv.org/abs/2105.01601>`_
+
+    Args:
+        depth (int) : number of MixerBlocks.
+        patch_size (Union[int, tuple]) : size of a single image patch.
+        n_patches (int) : number of patches.
+        n_channels (int) : channels(dimension) of a single embedded patch.
+        token_dim (int) : hidden dim of token-mixing MLP.
+        channel_dim (int) : hidden dim of channel-mixing MLP.
+        in_channels(int): number the channels of the input. Default: 3.
+        n_classes (int) : number of classification classes. Default: 1000.
+    """
+
+    def __init__(self,
+                 depth: int,
+                 patch_size: Union[int, tuple],
+                 n_patches: int,
+                 n_channels: int,
+                 token_dim: int,
+                 channel_dim: int,
+                 in_channels: int = 3,
+                 n_classes: int = 1000,
+                 ) -> None:
+        super().__init__()
+        self.n_patches = n_patches
+        self.n_channels = n_channels
+        # patch with shape of (3, patch_size, patch_size) is embedded to n_channels dim feature.
+        self.to_patch_embedding = nn.SequentialCell(
+            nn.Conv2d(in_chans, n_channels, patch_size, patch_size, pad_mode="pad", padding=0),
+            TransPose(permutation=(0, 2, 1), embedding=True),
+        )
+        self.mixer_blocks = nn.SequentialCell()
+        for _ in range(depth):
+            self.mixer_blocks.append(MixerBlock(n_patches, n_channels, token_dim, channel_dim))
+        self.layer_norm = nn.LayerNorm((n_channels,))
+        self.mlp_head = nn.Dense(n_channels, n_classes)
+        self._initialize_weights()
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.to_patch_embedding(x)
+        x = self.mixer_blocks(x)
+        x = self.layer_norm(x)
+        return ops.mean(x, 1)
+
+    def forward_head(self, x: Tensor)-> Tensor:
+        return self.mlp_head(x)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        return self.forward_head(x)
+
+    def _initialize_weights(self) -> None:
+        for name, m in self.cells_and_names():
+            if isinstance(m, nn.Conv2d):
+                m.weight.set_data(init.initializer(init.Normal(0.01, 0), m.weight.shape))
+                if m.bias is not None:
+                    m.bias.set_data(init.initializer(init.Constant(0), m.bias.shape))
+            elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
+                m.gamma.set_data(init.initializer(init.Constant(1), m.gamma.shape))
+                if m.beta is not None:
+                    m.beta.set_data(init.initializer(init.Constant(0.0001), m.beta.shape))
+            elif isinstance(m, nn.Dense):
+                m.weight.set_data(init.initializer(init.Normal(0.01, 0), m.weight.shape))
+                if m.bias is not None:
+                    m.bias.set_data(init.initializer(init.Constant(0), m.bias.shape))
+
+

规格函数

+

论文中所提出的模型可能有不同规格的变种,如channel的大小、depth的大小等等。这些变种的具体配置应该通过规格函数体现,规格的接口参数: pretrained, num_classes, in_channels 命名要统一,同时在规格函数内还要进行pretrain loading操作。每一个规格函数对应一种确定配置的规格变种。配置通过入参传入主模型类的定义,并返回实例化的主模型类。另外,还需通过添加装饰器@register_model将该模型的此规格注册到包内。

+

示例如下:

+
@register_model
+def mlp_mixer_s_p16(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 8, 16, 196, 512, 256, 2048
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds,
+                    channel_dim=dc, in_chans=in_chans, n_classes=num_classes, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+@register_model
+def mlp_mixer_b_p32(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 12, 32, 49, 768, 384, 3072
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds,
+                    channel_dim=dc, in_chans=in_chans, n_classes=num_classes, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+

验证main(可选)

+

初始编写阶段应当保证模型是可运行的。可通过下述代码块进行基础验证:

+
if __name__ == '__main__':
+    import numpy as np
+    import mindspore
+    from mindspore import Tensor
+
+    model = mlp_mixer_s_p16()
+    print(model)
+    dummy_input = Tensor(np.random.rand(8, 3, 224, 224), dtype=mindspore.float32)
+    y = model(dummy_input)
+    print(y.shape)
+
+

参考示例

+
    +
  • densenet.py
  • +
  • shufflenetv1.py
  • +
  • shufflenetv2.py
  • +
  • mixnet.py
  • +
  • mlp_mixer.py
  • +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/index.html b/zh/index.html new file mode 100644 index 000000000..e24168ee4 --- /dev/null +++ b/zh/index.html @@ -0,0 +1,1496 @@ + + + + + + + + + + + + + + + + + + + + + + 主页 - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +
+

MindCV

+

CI +PyPI - Python Version +PyPI +docs +license +open issues +PRs +Code style: black +Imports: isort +pre-commit

+
+

简介

+

MindCV是一个基于 MindSpore 开发的,致力于计算机视觉相关技术研发的开源工具箱。它提供大量的计算机视觉领域的经典模型和SoTA模型以及它们的预训练权重和训练策略。同时,还提供了自动增强等SoTA算法来提高模型性能。通过解耦的模块设计,您可以轻松地将MindCV应用到您自己的CV任务中。

+

主要特性

+
    +
  • +

    高易用性 MindCV将视觉任务分解为各种可配置的组件,用户可以轻松地构建自己的数据处理和模型训练流程。

    +
    >>> import mindcv
    +# 创建数据集
    +>>> dataset = mindcv.create_dataset('cifar10', download=True)
    +# 创建模型
    +>>> network = mindcv.create_model('resnet50', pretrained=True)
    +
    +

    用户可通过预定义的训练和微调脚本,快速配置并完成训练或迁移学习任务。

    +
    # 配置和启动迁移学习任务
    +python train.py --model swin_tiny --pretrained --opt=adamw --lr=0.001 --data_dir=/path/to/dataset
    +
    +
  • +
  • +

    高性能 MindCV集成了大量基于CNN和Transformer的高性能模型,如SwinTransformer,并提供预训练权重、训练策略和性能报告,帮助用户快速选型并将其应用于视觉模型。

    +
  • +
  • +

    灵活高效 MindCV基于高效的深度学习框架MindSpore开发,具有自动并行和自动微分等特性,支持不同硬件平台上(CPU/GPU/Ascend),同时支持效率优化的静态图模式和调试灵活的动态图模式。

    +
  • +
+

模型支持

+

基于MindCV进行模型实现和重训练的汇总结果详见模型仓库, 所用到的训练策略和训练后的模型权重均可通过表中链接获取。

+

各模型讲解和训练说明详见configs目录。

+

安装

+

详情请见安装页面。

+

快速入门

+

上手教程

+

在开始上手MindCV前,可以阅读MindCV的快速开始,该教程可以帮助用户快速了解MindCV的各个重要组件以及训练、验证、测试流程。

+

以下是一些供您快速体验的代码样例。

+
>>> import mindcv
+# 列出满足条件的预训练模型名称
+>>> mindcv.list_models("swin*", pretrained=True)
+['swin_tiny']
+# 创建模型
+>>> network = mindcv.create_model('swin_tiny', pretrained=True)
+# 验证模型的准确率
+>>> !python validate.py --model=swin_tiny --pretrained --dataset=imagenet --val_split=validation
+{'Top_1_Accuracy': 0.80824, 'Top_5_Accuracy': 0.94802, 'loss': 1.7331367141008378}
+
+
+图片分类示例 +

右键点击如下图片,另存为dog.jpg

+

+ +

+

使用加载了预训练参数的SoTA模型对图片进行推理。

+
>>> !python infer.py --model=swin_tiny --image_path='./dog.jpg'
+{'Labrador retriever': 0.5700152, 'golden retriever': 0.034551315, 'kelpie': 0.010108651, 'Chesapeake Bay retriever': 0.008229004, 'Walker hound, Walker foxhound': 0.007791956}
+
+

预测结果排名前1的是拉布拉多犬,正是这张图片里的狗狗的品种。

+
+

模型训练

+

通过train.py,用户可以很容易地在标准数据集或自定义数据集上训练模型,用户可以通过外部变量或者yaml配置文件来设置训练策略(如数据增强、学习率策略)。

+
    +
  • +

    单卡训练

    +
    # 单卡训练
    +python train.py --model resnet50 --dataset cifar10 --dataset_download
    +
    +

    以上代码是在CIFAR10数据集上单卡(CPU/GPU/Ascend)训练ResNet的示例,通过modeldataset参数分别指定需要训练的模型和数据集。

    +
  • +
  • +

    分布式训练

    +

    对于像ImageNet这样的大型数据集,有必要在多个设备上以分布式模式进行训练。基于MindSpore对分布式相关功能的良好支持,用户可以使用mpirun来进行模型的分布式训练。

    +
    # 分布式训练
    +# 假设你有4张GPU或者NPU卡
    +mpirun --allow-run-as-root -n 4 python train.py --distribute \
    +    --model densenet121 --dataset imagenet --data_dir ./datasets/imagenet
    +
    +

    完整的参数列表及说明在config.py中定义,可运行python train.py --help快速查看。

    +

    如需恢复训练,请指定--ckpt_path--ckpt_save_dir参数,脚本将加载路径中的模型权重和优化器状态,并恢复中断的训练进程。

    +
  • +
  • +

    超参配置和预训练策略

    +

    您可以编写yaml文件或设置外部参数来指定配置数据、模型、优化器等组件及其超参。以下是使用预设的训练策略(yaml文件)进行模型训练的示例。

    +
    mpirun --allow-run-as-root -n 4 python train.py -c configs/squeezenet/squeezenet_1.0_gpu.yaml
    +
    +
    +

    预定义的训练策略

    +

    MindCV目前提供了超过20种模型训练策略,在ImageNet取得SoTA性能。 +具体的参数配置和详细精度性能汇总请见configs文件夹。 +您可以便捷地将这些训练策略用于您的模型训练中以提高性能(复用或修改相应的yaml文件即可)。

    +
    +
  • +
  • +

    在ModelArts/OpenI平台上训练

    +

    ModelArtsOpenI云平台上进行训练,需要执行以下操作:

    +
    1、在云平台上创建新的训练任务。
    +2、在网站UI界面添加运行参数`config`,并指定yaml配置文件的路径。
    +3、在网站UI界面添加运行参数`enable_modelarts`并设置为True。
    +4、在网站上填写其他训练信息并启动训练任务。
    +
    +
  • +
+
+

静态图和动态图模式

+

在默认情况下,模型训练(train.py)在MindSpore上以图模式 运行,该模式对使用静态图编译对性能和并行计算进行了优化。 +相比之下,pynative模式的优势在于灵活性和易于调试。为了方便调试,您可以将参数--mode设为1以将运行模式设置为调试模式。

+
+
+

混合模式

+

基于mindspore.jit的混合模式 是兼顾了MindSpore的效率和灵活的混合模式。用户可通过使用train_with_func.py文件来使用该混合模式进行训练。

+
python train_with_func.py --model=resnet50 --dataset=cifar10 --dataset_download --epoch_size=10
+
+
+

注:此为试验性质的训练脚本,仍在改进,在MindSpore 1.8.1或更早版本上使用此模式目前并不稳定。

+
+
+

模型验证

+

使用validate.py可以便捷地验证训练好的模型。

+
# 验证模型
+python validate.py --model=resnet50 --dataset=imagenet --data_dir=/path/to/data --ckpt_path=/path/to/model.ckpt
+
+
+

训练过程中进行验证

+

当需要在训练过程中,跟踪模型在测试集上精度的变化时,请启用参数--val_while_train,如下

+
python train.py --model=resnet50 --dataset=cifar10 \
+    --val_while_train --val_split=test --val_interval=1
+
+

各轮次的训练损失和测试精度将保存在{ckpt_save_dir}/results.log中。

+

更多训练和验证的示例请见示例

+
+

教程

+

我们提供了系列教程,帮助用户学习如何使用MindCV.

+ +

支持算法

+
+ 支持算法列表 +
    +
  • 数据增强 +
  • +
  • 优化器
      +
    • Adam
    • +
    • AdamW
    • +
    • Lion
    • +
    • Adan (experimental)
    • +
    • AdaGrad
    • +
    • LAMB
    • +
    • Momentum
    • +
    • RMSProp
    • +
    • SGD
    • +
    • NAdam
    • +
    +
  • +
  • 学习率调度器
      +
    • Warmup Cosine Decay
    • +
    • Step LR
    • +
    • Polynomial Decay
    • +
    • Exponential Decay
    • +
    +
  • +
  • 正则化
      +
    • Weight Decay
    • +
    • Label Smoothing
    • +
    • Stochastic Depth (depends on networks)
    • +
    • Dropout (depends on networks)
    • +
    +
  • +
  • 损失函数
      +
    • Cross Entropy (w/ class weight and auxiliary logit support)
    • +
    • Binary Cross Entropy (w/ class weight and auxiliary logit support)
    • +
    • Soft Cross Entropy Loss (automatically enabled if mixup or label smoothing is used)
    • +
    • Soft Binary Cross Entropy Loss (automatically enabled if mixup or label smoothing is used)
    • +
    +
  • +
  • 模型融合
      +
    • Warmup EMA (Exponential Moving Average)
    • +
    +
  • +
+
+

贡献方式

+

欢迎开发者用户提issue或提交代码PR,或贡献更多的算法和模型,一起让MindCV变得更好。

+

有关贡献指南,请参阅贡献。 +请遵循模型编写指南所规定的规则来贡献模型接口:)

+

许可证

+

本项目遵循Apache License 2.0开源协议。

+

致谢

+

MindCV是由MindSpore团队、西安电子科技大学、西安交通大学联合开发的开源项目。 +衷心感谢所有参与的研究人员和开发人员为这个项目所付出的努力。 +十分感谢 OpenI 平台所提供的算力资源。

+

引用

+

如果你觉得MindCV对你的项目有帮助,请考虑引用:

+
@misc{MindSpore Computer Vision 2022,
+    title={{MindSpore Computer  Vision}:MindSpore Computer Vision Toolbox and Benchmark},
+    author={MindSpore Vision Contributors},
+    howpublished = {\url{https://github.com/mindspore-lab/mindcv/}},
+    year={2022}
+}
+
+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/installation/index.html b/zh/installation/index.html new file mode 100644 index 000000000..082716b26 --- /dev/null +++ b/zh/installation/index.html @@ -0,0 +1,1206 @@ + + + + + + + + + + + + + + + + + + + + + + + + 安装 - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

安装

+ +

依赖

+
    +
  • mindspore >= 1.8.1
  • +
  • numpy >= 1.17.0
  • +
  • pyyaml >= 5.3
  • +
  • tqdm
  • +
  • openmpi 4.0.3 (分布式训练所需)
  • +
+

为了安装python相关库依赖,只需运行:

+
pip install -r requirements.txt
+
+
+

Tip

+

MindSpore可以通过遵循官方指引,在不同的硬件平台上获得最优的安装体验。 +为了在分布式模式下运行,您还需要安装OpenMPI

+
+

如下的指引假设您已经完成了所有依赖库的安装。

+

PyPI源安装

+

MindCV被发布为一个Python包并能够通过pip进行安装。我们推荐您在虚拟环境安装使用。 打开终端,输入以下指令来安装MindCV:

+
+
+
+
pip install mindcv
+
+
+
+
# 暂不支持
+
+
+
+
+

上述命令会自动安装依赖:NumPyPyYAMLtqdm的兼容版本。

+
+

Tip

+

如果您之前没有使用 Python 的经验,我们建议您阅读使用Python的pip来管理您的项目的依赖关系, +这是对 Python 包管理机制的一个很好的介绍,并且可以帮助您在遇到错误时进行故障排除。

+
+
+

Warning

+

上述命令 不会 安装MindSpore. +我们强烈推荐您通过官方指引来安装MindSpore

+
+

源码安装 (未经测试版本)

+

from VSC

+
pip install git+https://github.com/mindspore-lab/mindcv.git
+
+

from local src

+
+

Tip

+

由于本项目处于活跃开发阶段,如果您是开发者或者贡献者,请优先选择此安装方式。

+
+

MindCV可以在由 GitHub 克隆仓库到本地文件夹后直接使用。 这对于想使用最新版本的开发者十分方便:

+
git clone https://github.com/mindspore-lab/mindcv.git
+
+

在克隆到本地之后,推荐您使用"可编辑"模式进行安装,这有助于解决潜在的模块导入问题。

+
cd mindcv
+pip install -e .
+
+ + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/modelzoo/index.html b/zh/modelzoo/index.html new file mode 100644 index 000000000..af9467819 --- /dev/null +++ b/zh/modelzoo/index.html @@ -0,0 +1,2484 @@ + + + + + + + + + + + + + + + + + + + + + + + + 模型仓库 - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

模型仓库

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModelContextTop-1 (%)Top-5 (%)Params(M)RecipeDownload
BiT_resnet50D910x8-G76.8193.1725.55yamlweights
BiT_resnet50x3D910x8-G80.6395.12217.31yamlweights
BiT_resnet101D910x8-G77.9393.7544.54yamlweights
coat_lite_tinyD910x8-G77.3593.435.72yamlweights
coat_lite_miniD910x8-G78.5193.8411.01yamlweights
coat_tinyD910x8-G79.6794.885.50yamlweights
coat_miniD910x8-G81.0895.3410.34yamlweights
convit_tinyD910x8-G73.6691.725.71yamlweights
convit_tiny_plusD910x8-G77.0093.609.97yamlweights
convit_smallD910x8-G81.6395.5927.78yamlweights
convit_small_plusD910x8-G81.8095.4248.98yamlweights
convit_baseD910x8-G82.1095.5286.54yamlweights
convit_base_plusD910x8-G81.9695.04153.13yamlweights
convnext_tinyD910x64-G81.9195.7928.59yamlweights
convnext_smallD910x64-G83.4096.3650.22yamlweights
convnext_baseD910x64-G83.3296.2488.59yamlweights
convnextv2_tinyD910x8-G82.4395.9828.64yamlweights
crossvit_9D910x8-G73.5691.798.55yamlweights
crossvit_15D910x8-G81.0895.3327.27yamlweights
crossvit_18D910x8-G81.9395.7543.27yamlweights
densenet121D910x8-G75.6492.848.06yamlweights
densenet161D910x8-G79.0994.6628.90yamlweights
densenet169D910x8-G77.2693.7114.31yamlweights
densenet201D910x8-G78.1494.0820.24yamlweights
dpn92D910x8-G79.4694.4937.79yamlweights
dpn98D910x8-G79.9494.5761.74yamlweights
dpn107D910x8-G80.0594.7487.13yamlweights
dpn131D910x8-G80.0794.7279.48yamlweights
edgenext_xx_smallD910x8-G71.0289.991.33yamlweights
edgenext_x_smallD910x8-G75.1492.502.34yamlweights
edgenext_smallD910x8-G79.1594.395.59yamlweights
edgenext_baseD910x8-G82.2495.9418.51yamlweights
efficientnet_b0D910x64-G76.8993.165.33yamlweights
efficientnet_b1D910x64-G78.9594.347.86yamlweights
ghostnet_050D910x8-G66.0386.642.60yamlweights
ghostnet_100D910x8-G73.7891.665.20yamlweights
ghostnet_130D910x8-G75.5092.567.39yamlweights
googlenetD910x8-G72.6890.896.99yamlweights
hrnet_w32D910x8-G80.6495.4441.30yamlweights
hrnet_w48D910x8-G81.1995.6977.57yamlweights
inception_v3D910x8-G79.1194.4027.20yamlweights
inception_v4D910x8-G80.8895.3442.74yamlweights
mixnet_sD910x8-G75.5292.524.17yamlweights
mixnet_mD910x8-G76.6493.055.06yamlweights
mixnet_lD910x8-G78.7394.317.38yamlweights
mnasnet_050D910x8-G68.0788.092.14yamlweights
mnasnet_075D910x8-G71.8190.533.20yamlweights
mnasnet_100D910x8-G74.2891.704.42yamlweights
mnasnet_130D910x8-G75.6592.646.33yamlweights
mnasnet_140D910x8-G76.0192.837.16yamlweights
mobilenet_v1_025D910x8-G53.8777.660.47yamlweights
mobilenet_v1_050D910x8-G65.9486.511.34yamlweights
mobilenet_v1_075D910x8-G70.4489.492.60yamlweights
mobilenet_v1_100D910x8-G72.9591.014.25yamlweights
mobilenet_v2_075D910x8-G69.9889.322.66yamlweights
mobilenet_v2_100D910x8-G72.2790.723.54yamlweights
mobilenet_v2_140D910x8-G75.5692.566.15yamlweights
mobilenet_v3_small_100D910x8-G68.1087.862.55yamlweights
mobilenet_v3_large_100D910x8-G75.2392.315.51yamlweights
mobilevit_xx_smallD910x8-G68.9188.911.27yamlweights
mobilevit_x_smallD910x8-G74.9992.322.32yamlweights
mobilevit_smallD910x8-G78.4794.185.59yamlweights
nasnet_a_4x1056D910x8-G73.6591.255.33yamlweights
pit_tiD910x8-G72.9691.334.85yamlweights
pit_xsD910x8-G78.4194.0610.61yamlweights
pit_sD910x8-G80.5694.8023.46yamlweights
pit_bD910x8-G81.8795.0473.76yamlweights
poolformer_s12D910x8-G77.3393.3411.92yamlweights
pvt_tinyD910x8-G74.8192.1813.23yamlweights
pvt_smallD910x8-G79.6694.7124.49yamlweights
pvt_mediumD910x8-G81.8295.8144.21yamlweights
pvt_largeD910x8-G81.7595.7061.36yamlweights
pvt_v2_b0D910x8-G71.5090.603.67yamlweights
pvt_v2_b1D910x8-G78.9194.4914.01yamlweights
pvt_v2_b2D910x8-G81.9995.7425.35yamlweights
pvt_v2_b3D910x8-G82.8496.2445.24yamlweights
pvt_v2_b4D910x8-G83.1496.2762.56yamlweights
regnet_x_200mfD910x8-G68.7488.382.68yamlweights
regnet_x_400mfD910x8-G73.1691.355.16yamlweights
regnet_x_600mfD910x8-G74.3492.006.20yamlweights
regnet_x_800mfD910x8-G76.0492.977.26yamlweights
regnet_y_200mfD910x8-G70.3089.613.16yamlweights
regnet_y_400mfD910x8-G73.9191.844.34yamlweights
regnet_y_600mfD910x8-G75.6992.506.06yamlweights
regnet_y_800mfD910x8-G76.5293.106.26yamlweights
regnet_y_16gfD910x8-G82.9296.2983.71yamlweights
repmlp_t224D910x8-G76.7193.3038.30yamlweights
repvgg_a0D910x8-G72.1990.759.13yamlweights
repvgg_a1D910x8-G74.1991.8914.12yamlweights
repvgg_a2D910x8-G76.6393.4228.25yamlweights
repvgg_b0D910x8-G74.9992.4015.85yamlweights
repvgg_b1D910x8-G78.8194.3757.48yamlweights
repvgg_b2D910x64-G79.2994.6689.11yamlweights
repvgg_b3D910x64-G80.4695.34123.19yamlweights
repvgg_b1g2D910x8-G78.0394.0945.85yamlweights
repvgg_b1g4D910x8-G77.6494.0340.03yamlweights
repvgg_b2g4D910x8-G78.894.3661.84yamlweights
res2net50D910x8-G79.3594.6425.76yamlweights
res2net101D910x8-G79.5694.7045.33yamlweights
res2net50_v1bD910x8-G80.3295.0925.77yamlweights
res2net101_v1bD910x8-G81.1495.4145.35yamlweights
resnest50D910x8-G80.8195.1627.55yamlweights
resnest101D910x8-G82.9096.1248.41yamlweights
resnet18D910x8-G70.2189.6211.70yamlweights
resnet34D910x8-G74.1591.9821.81yamlweights
resnet50D910x8-G76.6993.5025.61yamlweights
resnet101D910x8-G78.2494.0944.65yamlweights
resnet152D910x8-G78.7294.4560.34yamlweights
resnetv2_50D910x8-G76.9093.3725.60yamlweights
resnetv2_101D910x8-G78.4894.2344.55yamlweights
resnext50_32x4dD910x8-G78.5394.1025.10yamlweights
resnext101_32x4dD910x8-G79.8394.8044.32yamlweights
resnext101_64x4dD910x8-G80.3094.8283.66yamlweights
resnext152_64x4dD910x8-G80.5295.00115.27yamlweights
rexnet_09D910x8-G77.0693.414.13yamlweights
rexnet_10D910x8-G77.3893.604.84yamlweights
rexnet_13D910x8-G79.0694.287.61yamlweights
rexnet_15D910x8-G79.9594.749.79yamlweights
rexnet_20D910x8-G80.6494.9916.45yamlweights
seresnet18D910x8-G71.8190.4911.80yamlweights
seresnet34D910x8-G75.3892.5021.98yamlweights
seresnet50D910x8-G78.3294.0728.14yamlweights
seresnext26_32x4dD910x8-G77.1793.4216.83yamlweights
seresnext50_32x4dD910x8-G78.7194.3627.63yamlweights
shufflenet_v1_g3_05D910x8-G57.0579.730.73yamlweights
shufflenet_v1_g3_10D910x8-G67.7787.731.89yamlweights
shufflenet_v2_x0_5D910x8-G60.5382.111.37yamlweights
shufflenet_v2_x1_0D910x8-G69.4788.882.29yamlweights
shufflenet_v2_x1_5D910x8-G72.7990.933.53yamlweights
shufflenet_v2_x2_0D910x8-G75.0792.087.44yamlweights
skresnet18D910x8-G73.0991.2011.97yamlweights
skresnet34D910x8-G76.7193.1022.31yamlweights
skresnext50_32x4dD910x8-G79.0894.6037.31yamlweights
squeezenet1_0D910x8-G59.0181.011.25yamlweights
squeezenet1_0GPUx8-G58.8381.081.25yamlweights
squeezenet1_1D910x8-G58.4480.841.24yamlweights
squeezenet1_1GPUx8-G59.1881.411.24yamlweights
swin_tinyD910x8-G80.8294.8033.38yamlweights
swinv2_tiny_window8D910x8-G81.4295.4328.78yamlweights
vgg11D910x8-G71.8690.50132.86yamlweights
vgg13D910x8-G72.8791.02133.04yamlweights
vgg16D910x8-G74.6191.87138.35yamlweights
vgg19D910x8-G75.2192.56143.66yamlweights
visformer_tinyD910x8-G78.2894.1510.33yamlweights
visformer_tiny_v2D910x8-G78.8294.419.38yamlweights
visformer_smallD910x8-G81.7695.8840.25yamlweights
visformer_small_v2D910x8-G82.1795.9023.52yamlweights
vit_b_32_224D910x8-G75.8692.0887.46yamlweights
vit_l_16_224D910x8-G76.3492.79303.31yamlweights
vit_l_32_224D910x8-G73.7190.92305.52yamlweights
volo_d1D910x8-G82.5995.9927yamlweights
xceptionD910x8-G79.0194.2522.91yamlweights
xcit_tiny_12_p16_224D910x8-G77.6793.797.00yamlweights
+

Notes

+
    +
  • Context: Training context denoted as {device}x{pieces}-{MS mode}, where mindspore mode can be G - graph mode or F - pynative mode with ms function. For example, D910x8-G is for training on 8 pieces of Ascend 910 NPU using graph mode.
  • +
  • Top-1 and Top-5: Accuracy reported on the validation set of ImageNet-1K.
  • +
+ + + + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/notes/changelog/index.html b/zh/notes/changelog/index.html new file mode 100644 index 000000000..a1497941f --- /dev/null +++ b/zh/notes/changelog/index.html @@ -0,0 +1,1034 @@ + + + + + + + + + + + + + + + + + + + + + + + + 更新日志 - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

更新日志

+

即将到来

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/notes/code_of_conduct/index.html b/zh/notes/code_of_conduct/index.html new file mode 100644 index 000000000..568d7f713 --- /dev/null +++ b/zh/notes/code_of_conduct/index.html @@ -0,0 +1,1034 @@ + + + + + + + + + + + + + + + + + + + + + + + + 行为准则 - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

行为准则

+

即将到来

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/notes/contributing/index.html b/zh/notes/contributing/index.html new file mode 100644 index 000000000..33975aae4 --- /dev/null +++ b/zh/notes/contributing/index.html @@ -0,0 +1,1337 @@ + + + + + + + + + + + + + + + + + + + + + + + + Contributing - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+ +
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + +

MindCV Contributing Guidelines

+

Contributions are welcome, and they are greatly appreciated! Every little bit +helps, and credit will always be given.

+

Contributor License Agreement

+

It's required to sign CLA before your first code submission to MindCV community.

+

For individual contributor, please refer to ICLA online document for the detailed information.

+

Types of Contributions

+

Report Bugs

+

Report bugs at https://github.com/mindspore-lab/mindcv/issues.

+

If you are reporting a bug, please include:

+
    +
  • Your operating system name and version.
  • +
  • Any details about your local setup that might be helpful in troubleshooting.
  • +
  • Detailed steps to reproduce the bug.
  • +
+

Fix Bugs

+

Look through the GitHub issues for bugs. Anything tagged with "bug" and "help +wanted" is open to whoever wants to implement it.

+

Implement Features

+

Look through the GitHub issues for features. Anything tagged with "enhancement" +and "help wanted" is open to whoever wants to implement it.

+

Write Documentation

+

MindCV could always use more documentation, whether as part of the +official MindCV docs, in docstrings, or even on the web in blog posts, +articles, and such.

+

Submit Feedback

+

The best way to send feedback is to file an issue at https://github.com/mindspore-lab/mindcv/issues.

+

If you are proposing a feature:

+
    +
  • Explain in detail how it would work.
  • +
  • Keep the scope as narrow as possible, to make it easier to implement.
  • +
  • Remember that this is a volunteer-driven project, and that contributions are welcome :)
  • +
+

Getting Started

+

Ready to contribute? Here's how to set up mindcv for local development.

+
    +
  1. Fork the mindcv repo on GitHub.
  2. +
  3. Clone your fork locally:
  4. +
+
git clone git@github.com:your_name_here/mindcv.git
+
+

After that, you should add official repository as the upstream repository:

+
git remote add upstream git@github.com:mindspore-lab/mindcv
+
+
    +
  1. Install your local copy into a conda environment. Assuming you have conda installed, this is how you set up your fork for local development:
  2. +
+
conda create -n mindcv python=3.8
+conda activate mindcv
+cd mindcv
+pip install -e .
+
+
    +
  1. Create a branch for local development:
  2. +
+
git checkout -b name-of-your-bugfix-or-feature
+
+

Now you can make your changes locally.

+
    +
  1. When you're done making changes, check that your changes pass the linters and the tests:
  2. +
+
pre-commit run --show-diff-on-failure --color=always --all-files
+pytest
+
+

If all static linting are passed, you will get output like:

+

pre-commit-succeed

+

otherwise, you need to fix the warnings according to the output:

+

pre-commit-failed

+

To get pre-commit and pytest, just pip install them into your conda environment.

+
    +
  1. Commit your changes and push your branch to GitHub:
  2. +
+
git add .
+git commit -m "Your detailed description of your changes."
+git push origin name-of-your-bugfix-or-feature
+
+
    +
  1. Submit a pull request through the GitHub website.
  2. +
+

Pull Request Guidelines

+

Before you submit a pull request, check that it meets these guidelines:

+
    +
  1. The pull request should include tests.
  2. +
  3. If the pull request adds functionality, the docs should be updated. Put + your new functionality into a function with a docstring, and add the + feature to the list in README.md.
  4. +
  5. The pull request should work for Python 3.7, 3.8 and 3.9, and for PyPy. Check + https://github.com/mindspore-lab/mindcv/actions + and make sure that the tests pass for all supported Python versions.
  6. +
+

Tips

+

You can install the git hook scripts instead of linting with pre-commit run -a manually.

+

run flowing command to set up the git hook scripts

+
pre-commit install
+
+

now pre-commit will run automatically on git commit!

+

Releasing

+

A reminder for the maintainers on how to deploy. +Make sure all your changes are committed (including an entry in HISTORY.md). +Then run:

+
bump2version patch # possible: major / minor / patch
+git push
+git push --tags
+
+

GitHub Action will then deploy to PyPI if tests pass.

+ + + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/notes/faq/index.html b/zh/notes/faq/index.html new file mode 100644 index 000000000..15d3c4cec --- /dev/null +++ b/zh/notes/faq/index.html @@ -0,0 +1,1017 @@ + + + + + + + + + + + + + + + + + + + + + + 常见问题 - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

常见问题

+

即将到来

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/reference/data/index.html b/zh/reference/data/index.html new file mode 100644 index 000000000..973cba97b --- /dev/null +++ b/zh/reference/data/index.html @@ -0,0 +1,4180 @@ + + + + + + + + + + + + + + + + + + + + + + + + data - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Data

+

Auto Augmentation

+ + + +
+ + + +

+mindcv.data.auto_augment.auto_augment_transform(configs, hparams) + +

+ + +
+ +

Create a AutoAugment transform

+ + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
configs +
+

A string that defines the automatic augmentation configuration. +It is composed of multiple parts separated by dashes ("-"). The first part defines +the AutoAugment policy ('autoaug', 'autoaugr' or '3a': +'autoaug' for the original AutoAugment policy with PosterizeOriginal, +'autoaugr' for the AutoAugment policy with PosterizeIncreasing operation, + '3a' for the AutoAugment only with 3 augmentations.) +There is no order requirement for the remaining config parts.

+
    +
  • mstd: Float standard deviation of applied magnitude noise.
  • +
+

Example: 'autoaug-mstd0.5' will be automatically augment using the autoaug strategy +and magnitude_std 0.5.

+
+

+

+
hparams +
+

Other hparams of the automatic augmentation scheme.

+
+

+

+
+ +
+ Source code in mindcv/data/auto_augment.py +
419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
def auto_augment_transform(configs, hparams):
+    """
+    Create a AutoAugment transform
+    Args:
+        configs: A string that defines the automatic augmentation configuration.
+            It is composed of multiple parts separated by dashes ("-"). The first part defines
+            the AutoAugment policy ('autoaug', 'autoaugr' or '3a':
+            'autoaug' for the original AutoAugment policy with PosterizeOriginal,
+            'autoaugr' for the AutoAugment policy with PosterizeIncreasing operation,
+             '3a' for the AutoAugment only with 3 augmentations.)
+            There is no order requirement for the remaining config parts.
+
+            - mstd: Float standard deviation of applied magnitude noise.
+
+            Example: 'autoaug-mstd0.5' will be automatically augment using the autoaug strategy
+            and magnitude_std 0.5.
+        hparams: Other hparams of the automatic augmentation scheme.
+    """
+    config = configs.split("-")
+    policy_name = config[0]
+    config = config[1:]
+    hparams.setdefault("magnitude_std", 0.5)  # default magnitude_std is set to 0.5
+    for c in config:
+        cs = re.split(r"(\d.*)", c)
+        if len(cs) < 2:
+            continue
+        key, val = cs[:2]
+        if key == "mstd":
+            # noise param injected via hparams for now
+            hparams.setdefault("magnitude_std", float(val))
+        else:
+            assert False, "Unknown AutoAugment config section"
+    aa_policy = auto_augment_policy(policy_name, hparams=hparams)
+    return AutoAugment(aa_policy)
+
+
+
+ +
+ + +
+ + + +

+mindcv.data.auto_augment.rand_augment_transform(configs, hparams) + +

+ + +
+ +

Create a RandAugment transform

+ + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
configs +
+

A string that defines the random augmentation configuration. +It is composed of multiple parts separated by dashes ("-"). +The first part defines the AutoAugment policy ('randaug' policy). +There is no order requirement for the remaining config parts.

+
    +
  • m: Integer magnitude of rand augment. Default: 10
  • +
  • n: Integer num layer (number of transform operations selected for each image). Default: 2
  • +
  • w: Integer probability weight index (the index that affects a group of weights selected by operations).
  • +
  • mstd: Floating standard deviation of applied magnitude noise, + or uniform sampling at infinity (or greater than 100).
  • +
  • mmax: Set the upper range limit for magnitude to a value + other than the default value of _LEVEL_DENOM (10).
  • +
  • inc: Integer (bool), using the severity increase with magnitude (default: 0).
  • +
+

Example: 'randaug-w0-n3-mstd0.5' will be random augment + using the weights 0, num_layers 3, magnitude_std 0.5.

+
+

+

+
hparams +
+

Other hparams (kwargs) for the RandAugmentation scheme.

+
+

+

+
+ +
+ Source code in mindcv/data/auto_augment.py +
538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
def rand_augment_transform(configs, hparams):
+    """
+    Create a RandAugment transform
+    Args:
+        configs: A string that defines the random augmentation configuration.
+            It is composed of multiple parts separated by dashes ("-").
+            The first part defines the AutoAugment policy ('randaug' policy).
+            There is no order requirement for the remaining config parts.
+
+            - m: Integer magnitude of rand augment. Default: 10
+            - n: Integer num layer (number of transform operations selected for each image). Default: 2
+            - w: Integer probability weight index (the index that affects a group of weights selected by operations).
+            - mstd: Floating standard deviation of applied magnitude noise,
+                or uniform sampling at infinity (or greater than 100).
+            - mmax: Set the upper range limit for magnitude to a value
+                other than the default value of _LEVEL_DENOM (10).
+            - inc: Integer (bool), using the severity increase with magnitude (default: 0).
+
+            Example: 'randaug-w0-n3-mstd0.5' will be random augment
+                using the weights 0, num_layers 3, magnitude_std 0.5.
+        hparams: Other hparams (kwargs) for the RandAugmentation scheme.
+    """
+    magnitude = _LEVEL_DENOM  # default to _LEVEL_DENOM for magnitude (currently 10)
+    num_layers = 2  # default to 2 ops per image
+    hparams.setdefault("magnitude_std", 0.5)  # default magnitude_std is set to 0.5
+    weight_idx = None  # default to no probability weights for op choice
+    transforms = _RAND_TRANSFORMS
+    config = configs.split("-")
+    assert config[0] == "randaug"
+    config = config[1:]
+    for c in config:
+        cs = re.split(r"(\d.*)", c)
+        if len(cs) < 2:
+            continue
+        key, val = cs[:2]
+        if key == "mstd":
+            # noise param / randomization of magnitude values
+            mstd = float(val)
+            if mstd > 100:
+                # use uniform sampling in 0 to magnitude if mstd is > 100
+                mstd = float("inf")
+            hparams.setdefault("magnitude_std", mstd)
+        elif key == "mmax":
+            # clip magnitude between [0, mmax] instead of default [0, _LEVEL_DENOM]
+            hparams.setdefault("magnitude_max", int(val))
+        elif key == "inc":
+            if bool(val):
+                transforms = _RAND_INCREASING_TRANSFORMS
+        elif key == "m":
+            magnitude = int(val)
+        elif key == "n":
+            num_layers = int(val)
+        elif key == "w":
+            weight_idx = int(val)
+        else:
+            assert False, "Unknown RandAugment config section"
+    ra_ops = rand_augment_ops(magnitude=magnitude, hparams=hparams, transforms=transforms)
+    choice_weights = None if weight_idx is None else _select_rand_weights(weight_idx)
+    return RandAugment(ra_ops, num_layers, choice_weights=choice_weights)
+
+
+
+ +
+ + +
+ + + +

+mindcv.data.auto_augment.trivial_augment_wide_transform(configs, hparams) + +

+ + +
+ +

Create a TrivialAugmentWide transform

+ + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
configs +
+

A string that defines the TrivialAugmentWide configuration. +It is composed of multiple parts separated by dashes ("-"). +The first part defines the AutoAugment name, it should be 'trivialaugwide'. +the second part(not necessary) the maximum value of magnitude.

+
    +
  • m: final magnitude of a operation will uniform sampling from [0, m] . Default: 31
  • +
+

Example: 'trivialaugwide-m20' will be TrivialAugmentWide +with mgnitude uniform sampling from [0, 20],

+
+

+

+
hparams +
+

Other hparams (kwargs) for the TrivialAugment scheme.

+
+

+

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

A Mindspore compatible Transform

+
+
+ +
+ Source code in mindcv/data/auto_augment.py +
618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
def trivial_augment_wide_transform(configs, hparams):
+    """
+    Create a TrivialAugmentWide transform
+    Args:
+        configs: A string that defines the TrivialAugmentWide configuration.
+            It is composed of multiple parts separated by dashes ("-").
+            The first part defines the AutoAugment name, it should be 'trivialaugwide'.
+            the second part(not necessary) the maximum value of magnitude.
+
+            - m: final magnitude of a operation will uniform sampling from [0, m] . Default: 31
+
+            Example: 'trivialaugwide-m20' will be TrivialAugmentWide
+            with mgnitude uniform sampling from [0, 20],
+        hparams: Other hparams (kwargs) for the TrivialAugment scheme.
+    Returns:
+        A Mindspore compatible Transform
+    """
+    magnitude = 31
+    transforms = _TRIVIALAUGMENT_WIDE_TRANSFORMS
+    config = configs.split("-")
+    assert config[0] == "trivialaugwide"
+    config = config[1:]
+    for c in config:
+        cs = re.split(r"(\d.*)", c)
+        if len(cs) < 2:
+            continue
+        key, val = cs[:2]
+        if key == "m":
+            magnitude = int(val)
+        else:
+            assert False, "Unknown TrivialAugmentWide config section"
+    if not hparams:
+        hparams = dict()
+    hparams["magnitude_max"] = magnitude
+    hparams["magnitude_std"] = float("inf")  # default to uniform sampling
+    hparams["trivialaugwide"] = True
+    ta_ops = trivial_augment_wide_ops(magnitude=magnitude, hparams=hparams, transforms=transforms)
+    return TrivialAugmentWide(ta_ops)
+
+
+
+ +
+ + +
+ + + +

+mindcv.data.auto_augment.augment_and_mix_transform(configs, hparams=None) + +

+ + +
+ +

Create AugMix PyTorch transform

+ + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
configs +
+

String defining configuration of AugMix augmentation. Consists of multiple sections separated +by dashes ('-'). The first section defines the specific name of augment, it should be 'augmix'. +The remaining sections, not order sepecific determine + 'm' - integer magnitude (severity) of augmentation mix (default: 3) + 'w' - integer width of augmentation chain (default: 3) + 'd' - integer depth of augmentation chain (-1 is random [1, 3], default: -1) + 'a' - integer or float, the args of beta deviation of beta for generate the weight, default 1.. +Ex 'augmix-m5-w4-d2' results in AugMix with severity 5, chain width 4, chain depth 2

+
+

+ + TYPE: + str + +

+
hparams +
+

Other hparams (kwargs) for the Augmentation transforms

+
+

+ + DEFAULT: + None + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

A Mindspore compatible Transform

+
+
+ +
+ Source code in mindcv/data/auto_augment.py +
710
+711
+712
+713
+714
+715
+716
+717
+718
+719
+720
+721
+722
+723
+724
+725
+726
+727
+728
+729
+730
+731
+732
+733
+734
+735
+736
+737
+738
+739
+740
+741
+742
+743
+744
+745
+746
+747
+748
+749
+750
+751
+752
+753
+754
def augment_and_mix_transform(configs, hparams=None):
+    """Create AugMix PyTorch transform
+
+    Args:
+        configs (str): String defining configuration of AugMix augmentation. Consists of multiple sections separated
+            by dashes ('-'). The first section defines the specific name of augment, it should be 'augmix'.
+            The remaining sections, not order sepecific determine
+                'm' - integer magnitude (severity) of augmentation mix (default: 3)
+                'w' - integer width of augmentation chain (default: 3)
+                'd' - integer depth of augmentation chain (-1 is random [1, 3], default: -1)
+                'a' - integer or float, the args of beta deviation of beta for generate the weight, default 1..
+            Ex 'augmix-m5-w4-d2' results in AugMix with severity 5, chain width 4, chain depth 2
+
+        hparams: Other hparams (kwargs) for the Augmentation transforms
+
+    Returns:
+         A Mindspore compatible Transform
+    """
+    magnitude = 3
+    width = 3
+    depth = -1
+    alpha = 1.0
+    config = configs.split("-")
+    assert config[0] == "augmix"
+    config = config[1:]
+    for c in config:
+        cs = re.split(r"(\d.*)", c)
+        if len(cs) < 2:
+            continue
+        key, val = cs[:2]
+        if key == "m":
+            magnitude = int(val)
+        elif key == "w":
+            width = int(val)
+        elif key == "d":
+            depth = int(val)
+        elif key == "a":
+            alpha = float(val)
+        else:
+            assert False, "Unknown AugMix config section"
+    if not hparams:
+        hparams = dict()
+    hparams["magnitude_std"] = float("inf")  # default to uniform sampling (if not set via mstd arg)
+    ops = augmix_ops(magnitude=magnitude, hparams=hparams)
+    return AugMixAugment(ops, alpha=alpha, width=width, depth=depth)
+
+
+
+ +

Dataset Factory

+ + + +
+ + + +

+mindcv.data.dataset_factory.create_dataset(name='', root=None, split='train', shuffle=True, num_samples=None, num_shards=None, shard_id=None, num_parallel_workers=None, download=False, num_aug_repeats=0, **kwargs) + +

+ + +
+ +

Creates dataset by name.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
name +
+

dataset name like MNIST, CIFAR10, ImageNeT, ''. '' means a customized dataset. Default: ''.

+
+

+ + TYPE: + str + + + DEFAULT: + '' + +

+
root +
+

dataset root dir. Default: None.

+
+

+ + TYPE: + Optional[str] + + + DEFAULT: + None + +

+
split +
+

data split: '' or split name string (train/val/test), if it is '', no split is used. +Otherwise, it is a subfolder of root dir, e.g., train, val, test. Default: 'train'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'train' + +

+
shuffle +
+

whether to shuffle the dataset. Default: True.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
num_samples +
+

Number of elements to sample (default=None, which means sample all elements).

+
+

+ + TYPE: + Optional[int] + + + DEFAULT: + None + +

+
num_shards +
+

Number of shards that the dataset will be divided into (default=None). +When this argument is specified, num_samples reflects the maximum sample number of per shard.

+
+

+ + TYPE: + Optional[int] + + + DEFAULT: + None + +

+
shard_id +
+

The shard ID within num_shards (default=None). +This argument can only be specified when num_shards is also specified.

+
+

+ + TYPE: + Optional[int] + + + DEFAULT: + None + +

+
num_parallel_workers +
+

Number of workers to read the data (default=None, set in the config).

+
+

+ + TYPE: + Optional[int] + + + DEFAULT: + None + +

+
download +
+

whether to download the dataset. Default: False

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_aug_repeats +
+

Number of dataset repetition for repeated augmentation. +If 0 or 1, repeated augmentation is disabled. +Otherwise, repeated augmentation is enabled and the common choice is 3. (Default: 0)

+
+

+ + TYPE: + int + + + DEFAULT: + 0 + +

+
+ +
+ Note +

For custom datasets and imagenet, the dataset dir should follow the structure like: +.dataset_name/ +├── split1/ +│ ├── class1/ +│ │ ├── 000001.jpg +│ │ ├── 000002.jpg +│ │ └── .... +│ └── class2/ +│ ├── 000001.jpg +│ ├── 000002.jpg +│ └── .... +└── split2/ + ├── class1/ + │ ├── 000001.jpg + │ ├── 000002.jpg + │ └── .... + └── class2/ + ├── 000001.jpg + ├── 000002.jpg + └── ....

+
+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

Dataset object

+
+
+ +
+ Source code in mindcv/data/dataset_factory.py +
 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
def create_dataset(
+    name: str = "",
+    root: Optional[str] = None,
+    split: str = "train",
+    shuffle: bool = True,
+    num_samples: Optional[int] = None,
+    num_shards: Optional[int] = None,
+    shard_id: Optional[int] = None,
+    num_parallel_workers: Optional[int] = None,
+    download: bool = False,
+    num_aug_repeats: int = 0,
+    **kwargs,
+):
+    r"""Creates dataset by name.
+
+    Args:
+        name: dataset name like MNIST, CIFAR10, ImageNeT, ''. '' means a customized dataset. Default: ''.
+        root: dataset root dir. Default: None.
+        split: data split: '' or split name string (train/val/test), if it is '', no split is used.
+            Otherwise, it is a subfolder of root dir, e.g., train, val, test. Default: 'train'.
+        shuffle: whether to shuffle the dataset. Default: True.
+        num_samples: Number of elements to sample (default=None, which means sample all elements).
+        num_shards: Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
+        shard_id: The shard ID within `num_shards` (default=None).
+            This argument can only be specified when `num_shards` is also specified.
+        num_parallel_workers: Number of workers to read the data (default=None, set in the config).
+        download: whether to download the dataset. Default: False
+        num_aug_repeats: Number of dataset repetition for repeated augmentation.
+            If 0 or 1, repeated augmentation is disabled.
+            Otherwise, repeated augmentation is enabled and the common choice is 3. (Default: 0)
+
+    Note:
+        For custom datasets and imagenet, the dataset dir should follow the structure like:
+        .dataset_name/
+        ├── split1/
+        │  ├── class1/
+        │  │   ├── 000001.jpg
+        │  │   ├── 000002.jpg
+        │  │   └── ....
+        │  └── class2/
+        │      ├── 000001.jpg
+        │      ├── 000002.jpg
+        │      └── ....
+        └── split2/
+           ├── class1/
+           │   ├── 000001.jpg
+           │   ├── 000002.jpg
+           │   └── ....
+           └── class2/
+               ├── 000001.jpg
+               ├── 000002.jpg
+               └── ....
+
+    Returns:
+        Dataset object
+    """
+    name = name.lower()
+    if root is None:
+        root = os.path.join(get_dataset_download_root(), name)
+
+    assert (num_samples is None) or (num_aug_repeats == 0), "num_samples and num_aug_repeats can NOT be set together."
+
+    # subset sampling
+    if num_samples is not None and num_samples > 0:
+        # TODO: rewrite ordered distributed sampler (subset sampling in distributed mode is not tested)
+        if num_shards is not None and num_shards > 1:  # distributed
+            _logger.info(f"number of shards: {num_shards}, number of samples: {num_samples}")
+            sampler = DistributedSampler(num_shards, shard_id, shuffle=shuffle, num_samples=num_samples)
+        else:  # standalone
+            if shuffle:
+                sampler = ds.RandomSampler(replacement=False, num_samples=num_samples)
+            else:
+                sampler = ds.SequentialSampler(num_samples=num_samples)
+        mindspore_kwargs = dict(
+            shuffle=None,
+            sampler=sampler,
+            num_parallel_workers=num_parallel_workers,
+            **kwargs,
+        )
+    else:
+        sampler = None
+        mindspore_kwargs = dict(
+            shuffle=shuffle,
+            sampler=sampler,
+            num_shards=num_shards,
+            shard_id=shard_id,
+            num_parallel_workers=num_parallel_workers,
+            **kwargs,
+        )
+
+    # sampler for repeated augmentation
+    if num_aug_repeats > 0:
+        dataset_size = get_dataset_size(name, root, split)
+        _logger.info(
+            f"Repeated augmentation is enabled, num_aug_repeats: {num_aug_repeats}, "
+            f"original dataset size: {dataset_size}."
+        )
+        # since drop_remainder is usually True, we don't need to do rounding in sampling
+        sampler = RepeatAugSampler(
+            dataset_size,
+            num_shards=num_shards,
+            rank_id=shard_id,
+            num_repeats=num_aug_repeats,
+            selected_round=0,
+            shuffle=shuffle,
+        )
+        mindspore_kwargs = dict(shuffle=None, sampler=sampler, num_shards=None, shard_id=None, **kwargs)
+
+    # create dataset
+    if name in _MINDSPORE_BASIC_DATASET:
+        dataset_class = _MINDSPORE_BASIC_DATASET[name][0]
+        dataset_download = _MINDSPORE_BASIC_DATASET[name][1]
+        dataset_new_path = None
+        if download:
+            if shard_id is not None:
+                root = os.path.join(root, f"dataset_{str(shard_id)}")
+            dataset_download = dataset_download(root)
+            dataset_download.download()
+            dataset_new_path = dataset_download.path
+
+        dataset = dataset_class(
+            dataset_dir=dataset_new_path if dataset_new_path else root,
+            usage=split,
+            **mindspore_kwargs,
+        )
+        # address ms dataset num_classes empty issue
+        if name == "mnist":
+            dataset.num_classes = lambda: 10
+        elif name == "cifar10":
+            dataset.num_classes = lambda: 10
+        elif name == "cifar100":
+            dataset.num_classes = lambda: 100
+
+    else:
+        if name == "imagenet" and download:
+            raise ValueError(
+                "Imagenet dataset download is not supported. "
+                "Please download imagenet from https://www.image-net.org/download.php, "
+                "and parse the path of dateset directory via args.data_dir."
+            )
+
+        if os.path.isdir(root):
+            root = os.path.join(root, split)
+        dataset = ImageFolderDataset(dataset_dir=root, **mindspore_kwargs)
+        """ Another implementation which a bit slower than ImageFolderDataset
+            imagenet_dataset = ImageNetDataset(dataset_dir=root)
+            sampler = RepeatAugSampler(len(imagenet_dataset), num_shards=num_shards, rank_id=shard_id,
+                                       num_repeats=repeated_aug, selected_round=1, shuffle=shuffle)
+            dataset = ds.GeneratorDataset(imagenet_dataset, column_names=imagenet_dataset.column_names, sampler=sampler)
+        """
+    return dataset
+
+
+
+ +

Sampler

+ + +
+ + + +

+ mindcv.data.distributed_sampler.RepeatAugSampler + + +

+ + +
+ + +

Sampler that restricts data loading to a subset of the dataset for distributed, +with repeated augmentation. +It ensures that different each augmented version of a sample will be visible to a +different process.

+

This sampler was adapted from https://github.com/facebookresearch/deit/blob/0c4b8f60/samplers.py

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
dataset_size +
+

dataset size.

+
+

+

+
num_shards +
+

num devices.

+
+

+ + DEFAULT: + None + +

+
rank_id +
+

device id.

+
+

+ + DEFAULT: + None + +

+
shuffle(bool) +
+

True for using shuffle, False for not using.

+
+

+

+
num_repeats(int) +
+

num of repeated instances in repeated augmentation, Default:3.

+
+

+

+
selected_round(int) +
+

round the total num of samples by this factor, Defailt:256.

+
+

+

+
+ +
+ Source code in mindcv/data/distributed_sampler.py +
10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
+87
+88
+89
+90
class RepeatAugSampler:
+    """Sampler that restricts data loading to a subset of the dataset for distributed,
+    with repeated augmentation.
+    It ensures that different each augmented version of a sample will be visible to a
+    different process.
+
+    This sampler was adapted from https://github.com/facebookresearch/deit/blob/0c4b8f60/samplers.py
+
+    Args:
+        dataset_size: dataset size.
+        num_shards: num devices.
+        rank_id: device id.
+        shuffle(bool): True for using shuffle, False for not using.
+        num_repeats(int): num of repeated instances in repeated augmentation, Default:3.
+        selected_round(int): round the total num of samples by this factor, Defailt:256.
+    """
+
+    def __init__(
+        self,
+        dataset_size,
+        num_shards=None,
+        rank_id=None,
+        shuffle=True,
+        num_repeats=3,
+        selected_round=256,
+    ):
+        if num_shards is None:
+            _logger.warning("num_shards is set to 1 in RepeatAugSampler since it is not passed in")
+            num_shards = 1
+        if rank_id is None:
+            rank_id = 0
+
+        # assert isinstance(num_repeats, int), f'num_repeats should be Type integer, but got {type(num_repeats)}'
+
+        self.dataset_size = dataset_size
+        self.num_shards = num_shards
+        self.rank_id = rank_id
+        self.shuffle = shuffle
+        self.num_repeats = int(num_repeats)
+        self.epoch = 0
+        self.num_samples = int(math.ceil(self.dataset_size * num_repeats / self.num_shards))
+        self.total_size = self.num_samples * self.num_shards
+        # Determine the number of samples to select per epoch for each rank.
+        if selected_round:
+            self.num_selected_samples = int(
+                math.floor(self.dataset_size // selected_round * selected_round / num_shards)
+            )
+        else:
+            self.num_selected_samples = int(math.ceil(self.dataset_size / num_shards))
+
+    def __iter__(self):
+        # deterministically shuffle based on epoch
+        # print('__iter__  generating new shuffled indices: ', self.epoch)
+        if self.shuffle:
+            indices = np.random.RandomState(seed=self.epoch).permutation(self.dataset_size)
+            indices = indices.tolist()
+            self.epoch += 1
+            # print(indices[:30])
+        else:
+            indices = list(range(self.dataset_size))
+        # produce repeats e.g. [0, 0, 0, 1, 1, 1, 2, 2, 2....]
+        indices = [ele for ele in indices for i in range(self.num_repeats)]
+
+        # add extra samples to make it evenly divisible
+        padding_size = self.total_size - len(indices)
+        if padding_size > 0:
+            indices += indices[:padding_size]
+        assert len(indices) == self.total_size
+
+        # subsample per rank
+        indices = indices[self.rank_id : self.total_size : self.num_shards]
+        assert len(indices) == self.num_samples
+
+        # return up to num selected samples
+        return iter(indices[: self.num_selected_samples])
+
+    def __len__(self):
+        return self.num_selected_samples
+
+    def set_epoch(self, epoch):
+        self.epoch = epoch
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

DataLoader

+ + + +
+ + + +

+mindcv.data.loader.create_loader(dataset, batch_size, drop_remainder=False, is_training=False, mixup=0.0, cutmix=0.0, cutmix_prob=0.0, num_classes=1000, transform=None, target_transform=None, num_parallel_workers=None, python_multiprocessing=False, separate=False) + +

+ + +
+ +

Creates dataloader.

+

Applies operations such as transform and batch to the ms.dataset.Dataset object +created by the create_dataset function to get the dataloader.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
dataset +
+

dataset object created by create_dataset.

+
+

+ + TYPE: + ms.dataset.Dataset + +

+
batch_size +
+

The number of rows each batch is created with. An +int or callable object which takes exactly 1 parameter, BatchInfo.

+
+

+ + TYPE: + int or function + +

+
drop_remainder +
+

Determines whether to drop the last block +whose data row number is less than batch size (default=False). If True, and if there are less +than batch_size rows available to make the last batch, then those rows will +be dropped and not propagated to the child node.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
is_training +
+

whether it is in train mode. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
mixup +
+

mixup alpha, mixup will be enabled if > 0. (default=0.0).

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
cutmix +
+

cutmix alpha, cutmix will be enabled if > 0. (default=0.0). This operation is experimental.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
cutmix_prob +
+

prob of doing cutmix for an image (default=0.0)

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
num_classes +
+

the number of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
transform +
+

the list of transformations that wil be applied on the image, +which is obtained by create_transform. If None, the default imagenet transformation +for evaluation will be applied. Default: None.

+
+

+ + TYPE: + list or None + + + DEFAULT: + None + +

+
target_transform +
+

the list of transformations that will be applied on the label. +If None, the label will be converted to the type of ms.int32. Default: None.

+
+

+ + TYPE: + list or None + + + DEFAULT: + None + +

+
num_parallel_workers +
+

Number of workers(threads) to process the dataset in parallel +(default=None).

+
+

+ + TYPE: + int + + + DEFAULT: + None + +

+
python_multiprocessing +
+

Parallelize Python operations with multiple worker processes. This +option could be beneficial if the Python operation is computational heavy (default=False).

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
separate(bool, +
+

separate the image origin and the image been transformed

+
+

+ + TYPE: + optional + +

+
+ +
+ Note +
    +
  1. cutmix is now experimental (which means performance gain is not guarantee) + and can not be used together with mixup due to the label int type conflict.
  2. +
  3. is_training, mixup, num_classes is used for MixUp, which is a kind of transform operation. + However, we are not able to merge it into transform, due to the limitations of the mindspore.dataset API.
  4. +
+
+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

BatchDataset, dataset batched.

+
+
+ +
+ Source code in mindcv/data/loader.py +
 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
def create_loader(
+    dataset,
+    batch_size,
+    drop_remainder=False,
+    is_training=False,
+    mixup=0.0,
+    cutmix=0.0,
+    cutmix_prob=0.0,
+    num_classes=1000,
+    transform=None,
+    target_transform=None,
+    num_parallel_workers=None,
+    python_multiprocessing=False,
+    separate=False,
+):
+    r"""Creates dataloader.
+
+    Applies operations such as transform and batch to the `ms.dataset.Dataset` object
+    created by the `create_dataset` function to get the dataloader.
+
+    Args:
+        dataset (ms.dataset.Dataset): dataset object created by `create_dataset`.
+        batch_size (int or function): The number of rows each batch is created with. An
+            int or callable object which takes exactly 1 parameter, BatchInfo.
+        drop_remainder (bool, optional): Determines whether to drop the last block
+            whose data row number is less than batch size (default=False). If True, and if there are less
+            than batch_size rows available to make the last batch, then those rows will
+            be dropped and not propagated to the child node.
+        is_training (bool): whether it is in train mode. Default: False.
+        mixup (float): mixup alpha, mixup will be enabled if > 0. (default=0.0).
+        cutmix (float): cutmix alpha, cutmix will be enabled if > 0. (default=0.0). This operation is experimental.
+        cutmix_prob (float): prob of doing cutmix for an image (default=0.0)
+        num_classes (int): the number of classes. Default: 1000.
+        transform (list or None): the list of transformations that wil be applied on the image,
+            which is obtained by `create_transform`. If None, the default imagenet transformation
+            for evaluation will be applied. Default: None.
+        target_transform (list or None): the list of transformations that will be applied on the label.
+            If None, the label will be converted to the type of ms.int32. Default: None.
+        num_parallel_workers (int, optional): Number of workers(threads) to process the dataset in parallel
+            (default=None).
+        python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker processes. This
+            option could be beneficial if the Python operation is computational heavy (default=False).
+        separate(bool, optional): separate the image origin and the image been transformed
+
+    Note:
+        1. cutmix is now experimental (which means performance gain is not guarantee)
+            and can not be used together with mixup due to the label int type conflict.
+        2. `is_training`, `mixup`, `num_classes` is used for MixUp, which is a kind of transform operation.
+          However, we are not able to merge it into `transform`, due to the limitations of the `mindspore.dataset` API.
+
+
+    Returns:
+        BatchDataset, dataset batched.
+    """
+
+    if target_transform is None:
+        target_transform = transforms.TypeCast(ms.int32)
+    target_input_columns = "label" if "label" in dataset.get_col_names() else "fine_label"
+    dataset = dataset.map(
+        operations=target_transform,
+        input_columns=target_input_columns,
+        num_parallel_workers=num_parallel_workers,
+        python_multiprocessing=python_multiprocessing,
+    )
+
+    if transform is None:
+        warnings.warn(
+            "Using None as the default value of transform will set it back to "
+            "traditional image transform, which is not recommended. "
+            "You should explicitly call `create_transforms` and pass it to `create_loader`."
+        )
+        transform = create_transforms("imagenet", is_training=False)
+
+    # only apply augment splits to train dataset
+    if separate and is_training:
+        assert isinstance(transform, tuple) and len(transform) == 3
+
+        # Note: mindspore-2.0 delete the parameter column_order
+        sig = inspect.signature(dataset.map)
+        pass_column_order = False if "kwargs" in sig.parameters else True
+
+        # map all the transform
+        dataset = map_transform_splits(
+            dataset, transform, num_parallel_workers, python_multiprocessing, pass_column_order
+        )
+        # after batch, datasets has 4 columns
+        dataset = dataset.batch(batch_size=batch_size, drop_remainder=drop_remainder)
+        # concat the 3 columns of image
+        dataset = dataset.map(
+            operations=concat_per_batch_map,
+            input_columns=["image_clean", "image_aug1", "image_aug2", "label"],
+            output_columns=["image", "label"],
+            column_order=["image", "label"] if pass_column_order else None,
+            num_parallel_workers=num_parallel_workers,
+            python_multiprocessing=python_multiprocessing,
+        )
+
+    else:
+        dataset = dataset.map(
+            operations=transform,
+            input_columns="image",
+            num_parallel_workers=num_parallel_workers,
+            python_multiprocessing=python_multiprocessing,
+        )
+
+        dataset = dataset.batch(batch_size=batch_size, drop_remainder=drop_remainder)
+
+    if is_training:
+        if (mixup + cutmix > 0.0) and batch_size > 1:
+            # TODO: use mindspore vision cutmix and mixup after the confliction fixed in later release
+            # set label_smoothing 0 here since label smoothing is computed in loss module
+            mixup_fn = Mixup(
+                mixup_alpha=mixup,
+                cutmix_alpha=cutmix,
+                cutmix_minmax=None,
+                prob=cutmix_prob,
+                switch_prob=0.5,
+                label_smoothing=0.0,
+                num_classes=num_classes,
+            )
+            # images in a batch are mixed. labels are converted soft onehot labels.
+            dataset = dataset.map(
+                operations=mixup_fn,
+                input_columns=["image", target_input_columns],
+                num_parallel_workers=num_parallel_workers,
+            )
+
+    return dataset
+
+
+
+ +

MixUp

+ + +
+ + + +

+ mindcv.data.mixup.Mixup + + +

+ + +
+ + +

Mixup/Cutmix that applies different params to each element or whole batch

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
mixup_alpha +
+

mixup alpha value, mixup is active if > 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
cutmix_alpha +
+

cutmix alpha value, cutmix is active if > 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
cutmix_minmax +
+

cutmix min/max image ratio, cutmix is active and uses this vs alpha if not None.

+
+

+ + TYPE: + List[float] + + + DEFAULT: + None + +

+
prob +
+

probability of applying mixup or cutmix per batch or element

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
switch_prob +
+

probability of switching to cutmix instead of mixup when both are active

+
+

+ + TYPE: + float + + + DEFAULT: + 0.5 + +

+
mode +
+

how to apply mixup/cutmix params (per 'batch', 'pair' (pair of elements), 'elem' (element)

+
+

+ + TYPE: + str + + + DEFAULT: + 'batch' + +

+
correct_lam +
+

apply lambda correction when cutmix bbox clipped by image borders

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
label_smoothing +
+

apply label smoothing to the mixed target tensor

+
+

+ + TYPE: + float + + + DEFAULT: + 0.1 + +

+
num_classes +
+

number of classes for target

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
+ +
+ Source code in mindcv/data/mixup.py +
 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
class Mixup:
+    """Mixup/Cutmix that applies different params to each element or whole batch
+
+    Args:
+        mixup_alpha (float): mixup alpha value, mixup is active if > 0.
+        cutmix_alpha (float): cutmix alpha value, cutmix is active if > 0.
+        cutmix_minmax (List[float]): cutmix min/max image ratio, cutmix is active and uses this vs alpha if not None.
+        prob (float): probability of applying mixup or cutmix per batch or element
+        switch_prob (float): probability of switching to cutmix instead of mixup when both are active
+        mode (str): how to apply mixup/cutmix params (per 'batch', 'pair' (pair of elements), 'elem' (element)
+        correct_lam (bool): apply lambda correction when cutmix bbox clipped by image borders
+        label_smoothing (float): apply label smoothing to the mixed target tensor
+        num_classes (int): number of classes for target
+    """
+
+    def __init__(
+        self,
+        mixup_alpha=1.0,
+        cutmix_alpha=0.0,
+        cutmix_minmax=None,
+        prob=1.0,
+        switch_prob=0.5,
+        mode="batch",
+        correct_lam=True,
+        label_smoothing=0.1,
+        num_classes=1000,
+    ):
+        self.mixup_alpha = mixup_alpha
+        self.cutmix_alpha = cutmix_alpha
+        self.cutmix_minmax = cutmix_minmax
+        if self.cutmix_minmax is not None:
+            assert len(self.cutmix_minmax) == 2
+            # force cutmix alpha == 1.0 when minmax active to keep logic simple & safe
+            self.cutmix_alpha = 1.0
+        self.mix_prob = prob
+        self.switch_prob = switch_prob
+        self.label_smoothing = label_smoothing
+        self.num_classes = num_classes
+        self.mode = mode
+        self.correct_lam = correct_lam  # correct lambda based on clipped area for cutmix
+        self.mixup_enabled = True  # set false to disable mixing (intended tp be set by train loop)
+
+    def _params_per_elem(self, batch_size):
+        """_params_per_elem"""
+        lam = np.ones(batch_size, dtype=np.float32)
+        use_cutmix = np.zeros(batch_size, dtype=np.bool)
+        if self.mixup_enabled:
+            if self.mixup_alpha > 0.0 and self.cutmix_alpha > 0.0:
+                use_cutmix = np.random.rand(batch_size) < self.switch_prob
+                lam_mix = np.where(
+                    use_cutmix,
+                    np.random.beta(self.cutmix_alpha, self.cutmix_alpha, size=batch_size),
+                    np.random.beta(self.mixup_alpha, self.mixup_alpha, size=batch_size),
+                )
+            elif self.mixup_alpha > 0.0:
+                lam_mix = np.random.beta(self.mixup_alpha, self.mixup_alpha, size=batch_size)
+            elif self.cutmix_alpha > 0.0:
+                use_cutmix = np.ones(batch_size, dtype=np.bool)
+                lam_mix = np.random.beta(self.cutmix_alpha, self.cutmix_alpha, size=batch_size)
+            else:
+                assert False, "One of mixup_alpha > 0., cutmix_alpha > 0., cutmix_minmax not None should be true."
+            lam = np.where(np.random.rand(batch_size) < self.mix_prob, lam_mix.astype(np.float32), lam)
+        return lam, use_cutmix
+
+    def _params_per_batch(self):
+        """_params_per_batch"""
+        lam = 1.0
+        use_cutmix = False
+        if self.mixup_enabled and np.random.rand() < self.mix_prob:
+            if self.mixup_alpha > 0.0 and self.cutmix_alpha > 0.0:
+                use_cutmix = np.random.rand() < self.switch_prob
+                lam_mix = (
+                    np.random.beta(self.cutmix_alpha, self.cutmix_alpha)
+                    if use_cutmix
+                    else np.random.beta(self.mixup_alpha, self.mixup_alpha)
+                )
+            elif self.mixup_alpha > 0.0:
+                lam_mix = np.random.beta(self.mixup_alpha, self.mixup_alpha)
+            elif self.cutmix_alpha > 0.0:
+                use_cutmix = True
+                lam_mix = np.random.beta(self.cutmix_alpha, self.cutmix_alpha)
+            else:
+                assert False, "One of mixup_alpha > 0., cutmix_alpha > 0., cutmix_minmax not None should be true."
+            lam = float(lam_mix)
+        return lam, use_cutmix
+
+    def _mix_elem(self, x):
+        """_mix_elem"""
+        batch_size = len(x)
+        lam_batch, use_cutmix = self._params_per_elem(batch_size)
+        x_orig = x.clone()  # need to keep an unmodified original for mixing source
+        for i in range(batch_size):
+            j = batch_size - i - 1
+            lam = lam_batch[i]
+            if lam != 1.0:
+                if use_cutmix[i]:
+                    (yl, yh, xl, xh), lam = cutmix_bbox_and_lam(
+                        x[i].shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam
+                    )
+                    x[i][:, yl:yh, xl:xh] = x_orig[j][:, yl:yh, xl:xh]
+                    lam_batch[i] = lam
+                else:
+                    x[i] = x[i] * lam + x_orig[j] * (1 - lam)
+        return P.ExpandDims()(Tensor(lam_batch, dtype=mstype.float32), 1)
+
+    def _mix_pair(self, x):
+        """_mix_pair"""
+        batch_size = len(x)
+        lam_batch, use_cutmix = self._params_per_elem(batch_size // 2)
+        x_orig = x.clone()  # need to keep an unmodified original for mixing source
+        for i in range(batch_size // 2):
+            j = batch_size - i - 1
+            lam = lam_batch[i]
+            if lam != 1.0:
+                if use_cutmix[i]:
+                    (yl, yh, xl, xh), lam = cutmix_bbox_and_lam(
+                        x[i].shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam
+                    )
+                    x[i][:, yl:yh, xl:xh] = x_orig[j][:, yl:yh, xl:xh]
+                    x[j][:, yl:yh, xl:xh] = x_orig[i][:, yl:yh, xl:xh]
+                    lam_batch[i] = lam
+                else:
+                    x[i] = x[i] * lam + x_orig[j] * (1 - lam)
+                    x[j] = x[j] * lam + x_orig[i] * (1 - lam)
+        lam_batch = np.concatenate((lam_batch, lam_batch[::-1]))
+        return P.ExpandDims()(Tensor(lam_batch, dtype=mstype.float32), 1)
+
+    def _mix_batch(self, x):
+        """_mix_batch"""
+        lam, use_cutmix = self._params_per_batch()
+        if lam == 1.0:
+            return 1.0
+        if use_cutmix:
+            (yl, yh, xl, xh), lam = cutmix_bbox_and_lam(
+                x.shape, lam, ratio_minmax=self.cutmix_minmax, correct_lam=self.correct_lam
+            )
+            x[:, :, yl:yh, xl:xh] = np.flip(x, axis=0)[:, :, yl:yh, xl:xh]
+        else:
+            x_flipped = np.flip(x, axis=0) * (1.0 - lam)
+            x *= lam
+            x += x_flipped
+        return lam
+
+    def __call__(self, x, target):
+        """Mixup apply"""
+        # the same to image, label
+        assert len(x) % 2 == 0, "Batch size should be even when using this"
+        if self.mode == "elem":
+            lam = self._mix_elem(x)
+        elif self.mode == "pair":
+            lam = self._mix_pair(x)
+        else:
+            lam = self._mix_batch(x)
+        target = mixup_target(target, self.num_classes, lam, self.label_smoothing)
+        return x.astype(np.float32), target.astype(np.float32)
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Transform Factory

+ + + +
+ + + +

+mindcv.data.transforms_factory.create_transforms(dataset_name='', image_resize=224, is_training=False, auto_augment=None, separate=False, **kwargs) + +

+ + +
+ +

Creates a list of transform operation on image data.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
dataset_name +
+

if '', customized dataset. Currently, apply the same transform pipeline as ImageNet. +if standard dataset name is given including imagenet, cifar10, mnist, preset transforms will be returned. +Default: ''.

+
+

+ + TYPE: + str + + + DEFAULT: + '' + +

+
image_resize +
+

the image size after resize for adapting to network. Default: 224.

+
+

+ + TYPE: + int + + + DEFAULT: + 224 + +

+
is_training +
+

if True, augmentation will be applied if support. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
separate +
+

separate the image origin and the image been transformed.

+
+

+ + DEFAULT: + False + +

+
**kwargs +
+

additional args parsed to transforms_imagenet_train and transforms_imagenet_eval

+
+

+ + DEFAULT: + {} + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

A list of transformation operations

+
+
+ +
+ Source code in mindcv/data/transforms_factory.py +
182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
def create_transforms(
+    dataset_name="",
+    image_resize=224,
+    is_training=False,
+    auto_augment=None,
+    separate=False,
+    **kwargs,
+):
+    r"""Creates a list of transform operation on image data.
+
+    Args:
+        dataset_name (str): if '', customized dataset. Currently, apply the same transform pipeline as ImageNet.
+            if standard dataset name is given including imagenet, cifar10, mnist, preset transforms will be returned.
+            Default: ''.
+        image_resize (int): the image size after resize for adapting to network. Default: 224.
+        is_training (bool): if True, augmentation will be applied if support. Default: False.
+        auto_augment(str):augmentation strategies, such as "augmix", "autoaug" etc.
+        separate: separate the image origin and the image been transformed.
+        **kwargs: additional args parsed to `transforms_imagenet_train` and `transforms_imagenet_eval`
+
+    Returns:
+        A list of transformation operations
+    """
+
+    dataset_name = dataset_name.lower()
+
+    if dataset_name in ("imagenet", ""):
+        trans_args = dict(image_resize=image_resize, **kwargs)
+        if is_training:
+            return transforms_imagenet_train(auto_augment=auto_augment, separate=separate, **trans_args)
+
+        return transforms_imagenet_eval(**trans_args)
+    elif dataset_name in ("cifar10", "cifar100"):
+        trans_list = transforms_cifar(resize=image_resize, is_training=is_training)
+        return trans_list
+    elif dataset_name == "mnist":
+        trans_list = transforms_mnist(resize=image_resize)
+        return trans_list
+    else:
+        raise NotImplementedError(
+            f"Only supports creating transforms for ['imagenet'] datasets, but got {dataset_name}."
+        )
+
+
+
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/reference/loss/index.html b/zh/reference/loss/index.html new file mode 100644 index 000000000..d8dfb9036 --- /dev/null +++ b/zh/reference/loss/index.html @@ -0,0 +1,1994 @@ + + + + + + + + + + + + + + + + + + + + + + + + loss - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Loss

+

Loss Factory

+ + + +
+ + + +

+mindcv.loss.loss_factory.create_loss(name='CE', weight=None, reduction='mean', label_smoothing=0.0, aux_factor=0.0) + +

+ + +
+ +

Creates loss function

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
name +
+

loss name : 'CE' for cross_entropy. 'BCE': binary cross entropy. Default: 'CE'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'CE' + +

+
weight +
+

Class weight. A rescaling weight given to the loss of each batch element. +If given, has to be a Tensor of size 'nbatch'. Data type must be float16 or float32.

+
+

+ + TYPE: + Tensor + + + DEFAULT: + None + +

+
reduction +
+

Apply specific reduction method to the output: 'mean' or 'sum'. +By default, the sum of the output will be divided by the number of elements in the output. +'sum': the output will be summed. Default:'mean'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'mean' + +

+
label_smoothing +
+

Label smoothing factor, a regularization tool used to prevent the model +from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
aux_factor +
+

Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs +(i.e., deep supervision), like inception_v3. Default: 0.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
+ +
+ Inputs +
    +
  • logits (Tensor or Tuple of Tensor): Input logits. Shape [N, C], where N means the number of samples, + C means number of classes. Tuple of two input logits are supported in order (main_logits, aux_logits) + for auxiliary loss used in networks like inception_v3. Data type must be float16 or float32.
  • +
  • labels (Tensor): Ground truth labels. Shape: [N] or [N, C]. + (1) If in shape [N], sparse labels representing the class indices. Must be int type. + (2) shape [N, C], dense labels representing the ground truth class probability values, + or the one-hot labels. Must be float type. If the loss type is BCE, the shape of labels must be [N, C].
  • +
+
+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

Loss function to compute the loss between the input logits and labels.

+
+
+ +
+ Source code in mindcv/loss/loss_factory.py +
14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
def create_loss(
+    name: str = "CE",
+    weight: Optional[Tensor] = None,
+    reduction: str = "mean",
+    label_smoothing: float = 0.0,
+    aux_factor: float = 0.0,
+):
+    r"""Creates loss function
+
+    Args:
+        name (str):  loss name : 'CE' for cross_entropy. 'BCE': binary cross entropy. Default: 'CE'.
+        weight (Tensor): Class weight. A rescaling weight given to the loss of each batch element.
+            If given, has to be a Tensor of size 'nbatch'. Data type must be float16 or float32.
+        reduction: Apply specific reduction method to the output: 'mean' or 'sum'.
+            By default, the sum of the output will be divided by the number of elements in the output.
+            'sum': the output will be summed. Default:'mean'.
+        label_smoothing: Label smoothing factor, a regularization tool used to prevent the model
+            from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.
+        aux_factor (float): Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs
+            (i.e., deep supervision), like inception_v3. Default: 0.0.
+
+    Inputs:
+        - logits (Tensor or Tuple of Tensor): Input logits. Shape [N, C], where N means the number of samples,
+            C means number of classes. Tuple of two input logits are supported in order (main_logits, aux_logits)
+            for auxiliary loss used in networks like inception_v3. Data type must be float16 or float32.
+        - labels (Tensor): Ground truth labels. Shape: [N] or [N, C].
+            (1) If in shape [N], sparse labels representing the class indices. Must be int type.
+            (2) shape [N, C], dense labels representing the ground truth class probability values,
+            or the one-hot labels. Must be float type. If the loss type is BCE, the shape of labels must be [N, C].
+
+    Returns:
+       Loss function to compute the loss between the input logits and labels.
+    """
+    name = name.lower()
+
+    if name == "ce":
+        loss = CrossEntropySmooth(smoothing=label_smoothing, aux_factor=aux_factor, reduction=reduction, weight=weight)
+    elif name == "bce":
+        loss = BinaryCrossEntropySmooth(
+            smoothing=label_smoothing, aux_factor=aux_factor, reduction=reduction, weight=weight, pos_weight=None
+        )
+    elif name == "asl_single_label":
+        loss = AsymmetricLossSingleLabel(smoothing=label_smoothing)
+    elif name == "asl_multi_label":
+        loss = AsymmetricLossMultilabel()
+    elif name == "jsd":
+        loss = JSDCrossEntropy(smoothing=label_smoothing, aux_factor=aux_factor, reduction=reduction, weight=weight)
+    else:
+        raise NotImplementedError
+
+    return loss
+
+
+
+ +

Cross Entropy

+ + +
+ + + +

+ mindcv.loss.cross_entropy_smooth.CrossEntropySmooth + + +

+ + +
+

+ Bases: nn.LossBase

+ + +

Cross entropy loss with label smoothing. +Apply softmax activation function to input logits, and uses the given logits to compute cross entropy +between the logits and the label.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
smoothing +
+

Label smoothing factor, a regularization tool used to prevent the model +from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.

+
+

+ + DEFAULT: + 0.0 + +

+
aux_factor +
+

Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs +(i.e., deep supervision), like inception_v3. Default: 0.0.

+
+

+ + DEFAULT: + 0.0 + +

+
reduction +
+

Apply specific reduction method to the output: 'mean' or 'sum'. Default: 'mean'.

+
+

+ + DEFAULT: + 'mean' + +

+
weight +
+

Class weight. Shape [C]. A rescaling weight applied to the loss of each batch element. +Data type must be float16 or float32.

+
+

+ + TYPE: + Tensor + + + DEFAULT: + None + +

+
+ +
+ Inputs +

logits (Tensor or Tuple of Tensor): Input logits. Shape [N, C], where N is # samples, C is # classes. + Tuple composed of multiple logits are supported in order (main_logits, aux_logits) + for auxiliary loss used in networks like inception_v3. +labels (Tensor): Ground truth label. Shape: [N] or [N, C]. + (1) Shape (N), sparse labels representing the class indices. Must be int type. + (2) Shape [N, C], dense labels representing the ground truth class probability values, + or the one-hot labels. Must be float type.

+
+
+ Source code in mindcv/loss/cross_entropy_smooth.py +
 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
class CrossEntropySmooth(nn.LossBase):
+    """
+    Cross entropy loss with label smoothing.
+    Apply softmax activation function to input `logits`, and uses the given logits to compute cross entropy
+    between the logits and the label.
+
+    Args:
+        smoothing: Label smoothing factor, a regularization tool used to prevent the model
+            from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.
+        aux_factor: Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs
+            (i.e., deep supervision), like inception_v3.  Default: 0.0.
+        reduction: Apply specific reduction method to the output: 'mean' or 'sum'. Default: 'mean'.
+        weight (Tensor): Class weight. Shape [C]. A rescaling weight applied to the loss of each batch element.
+            Data type must be float16 or float32.
+
+    Inputs:
+        logits (Tensor or Tuple of Tensor): Input logits. Shape [N, C], where N is # samples, C is # classes.
+            Tuple composed of multiple logits are supported in order (main_logits, aux_logits)
+            for auxiliary loss used in networks like inception_v3.
+        labels (Tensor): Ground truth label. Shape: [N] or [N, C].
+            (1) Shape (N), sparse labels representing the class indices. Must be int type.
+            (2) Shape [N, C], dense labels representing the ground truth class probability values,
+            or the one-hot labels. Must be float type.
+    """
+
+    def __init__(self, smoothing=0.0, aux_factor=0.0, reduction="mean", weight=None):
+        super().__init__()
+        self.smoothing = smoothing
+        self.aux_factor = aux_factor
+        self.reduction = reduction
+        self.weight = weight
+
+    def construct(self, logits, labels):
+        loss_aux = 0
+
+        if isinstance(logits, tuple):
+            main_logits = logits[0]
+            for aux in logits[1:]:
+                if self.aux_factor > 0:
+                    loss_aux += F.cross_entropy(
+                        aux, labels, weight=self.weight, reduction=self.reduction, label_smoothing=self.smoothing
+                    )
+        else:
+            main_logits = logits
+
+        loss_logits = F.cross_entropy(
+            main_logits, labels, weight=self.weight, reduction=self.reduction, label_smoothing=self.smoothing
+        )
+        loss = loss_logits + self.aux_factor * loss_aux
+        return loss
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Binary Cross Entropy

+ + +
+ + + +

+ mindcv.loss.binary_cross_entropy_smooth.BinaryCrossEntropySmooth + + +

+ + +
+

+ Bases: nn.LossBase

+ + +

Binary cross entropy loss with label smoothing. +Apply sigmoid activation function to input logits, and uses the given logits to compute binary cross entropy +between the logits and the label.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
smoothing +
+

Label smoothing factor, a regularization tool used to prevent the model +from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.

+
+

+ + DEFAULT: + 0.0 + +

+
aux_factor +
+

Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs +(i.e., deep supervision), like inception_v3. Default: 0.0.

+
+

+ + DEFAULT: + 0.0 + +

+
reduction +
+

Apply specific reduction method to the output: 'mean' or 'sum'. Default: 'mean'.

+
+

+ + DEFAULT: + 'mean' + +

+
weight +
+

Class weight. A rescaling weight applied to the loss of each batch element. Shape [C]. +It can be broadcast to a tensor with shape of logits. Data type must be float16 or float32.

+
+

+ + TYPE: + Tensor + + + DEFAULT: + None + +

+
pos_weight +
+

Positive weight for each class. A weight of positive examples. Shape [C]. +Must be a vector with length equal to the number of classes. +It can be broadcast to a tensor with shape of logits. Data type must be float16 or float32.

+
+

+ + TYPE: + Tensor + + + DEFAULT: + None + +

+
+ +
+ Inputs +

logits (Tensor or Tuple of Tensor): (1) Input logits. Shape [N, C], where N is # samples, C is # classes. + Or (2) Tuple of two input logits (main_logits and aux_logits) for auxiliary loss. +labels (Tensor): Ground truth label, (1) shape [N, C], has the same shape as logits or (2) shape [N]. + can be a class probability matrix or one-hot labels. Data type must be float16 or float32.

+
+
+ Source code in mindcv/loss/binary_cross_entropy_smooth.py +
 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
+87
class BinaryCrossEntropySmooth(nn.LossBase):
+    """
+    Binary cross entropy loss with label smoothing.
+    Apply sigmoid activation function to input `logits`, and uses the given logits to compute binary cross entropy
+    between the logits and the label.
+
+    Args:
+        smoothing: Label smoothing factor, a regularization tool used to prevent the model
+            from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: 0.0.
+        aux_factor: Auxiliary loss factor. Set aux_factor > 0.0 if the model has auxiliary logit outputs
+            (i.e., deep supervision), like inception_v3.  Default: 0.0.
+        reduction: Apply specific reduction method to the output: 'mean' or 'sum'. Default: 'mean'.
+        weight (Tensor): Class weight. A rescaling weight applied to the loss of each batch element. Shape [C].
+            It can be broadcast to a tensor with shape of `logits`. Data type must be float16 or float32.
+        pos_weight (Tensor): Positive weight for each class. A weight of positive examples. Shape [C].
+            Must be a vector with length equal to the number of classes.
+            It can be broadcast to a tensor with shape of `logits`. Data type must be float16 or float32.
+
+    Inputs:
+        logits (Tensor or Tuple of Tensor): (1) Input logits. Shape [N, C], where N is # samples, C is # classes.
+            Or (2) Tuple of two input logits (main_logits and aux_logits) for auxiliary loss.
+        labels (Tensor): Ground truth label, (1) shape [N, C], has the same shape as `logits` or (2) shape [N].
+            can be a class probability matrix or one-hot labels. Data type must be float16 or float32.
+    """
+
+    def __init__(self, smoothing=0.0, aux_factor=0.0, reduction="mean", weight=None, pos_weight=None):
+        super().__init__()
+        self.smoothing = smoothing
+        self.aux_factor = aux_factor
+        self.reduction = reduction
+        self.weight = weight
+        self.pos_weight = pos_weight
+        self.ones = P.OnesLike()
+        self.one_hot = P.OneHot()
+
+    def construct(self, logits, labels):
+        loss_aux = 0
+        aux_logits = None
+
+        if isinstance(logits, tuple):
+            main_logits = logits[0]
+        else:
+            main_logits = logits
+
+        if main_logits.size != labels.size:
+            # We must explicitly convert the label to one-hot,
+            # for binary_cross_entropy_with_logits restricting input and label have the same shape.
+            class_dim = 0 if main_logits.ndim == 1 else 1
+            n_classes = main_logits.shape[class_dim]
+            labels = self.one_hot(labels, n_classes, Tensor(1.0), Tensor(0.0))
+
+        ones_input = self.ones(main_logits)
+        if self.weight is not None:
+            weight = self.weight
+        else:
+            weight = ones_input
+        if self.pos_weight is not None:
+            pos_weight = self.pos_weight
+        else:
+            pos_weight = ones_input
+
+        if self.smoothing > 0.0:
+            class_dim = 0 if main_logits.ndim == 1 else -1
+            n_classes = main_logits.shape[class_dim]
+            labels = labels * (1 - self.smoothing) + self.smoothing / n_classes
+
+        if self.aux_factor > 0 and aux_logits is not None:
+            for aux_logits in logits[1:]:
+                loss_aux += F.binary_cross_entropy_with_logits(
+                    aux_logits, labels, weight=weight, pos_weight=pos_weight, reduction=self.reduction
+                )
+        # else:
+        #    warnings.warn("There are logit tuple input, but the auxiliary loss factor is 0.")
+
+        loss_logits = F.binary_cross_entropy_with_logits(
+            main_logits, labels, weight=weight, pos_weight=pos_weight, reduction=self.reduction
+        )
+
+        loss = loss_logits + self.aux_factor * loss_aux
+
+        return loss
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/reference/models.layers/index.html b/zh/reference/models.layers/index.html new file mode 100644 index 000000000..71145e913 --- /dev/null +++ b/zh/reference/models.layers/index.html @@ -0,0 +1,2976 @@ + + + + + + + + + + + + + + + + + + + + + + + + models.layers - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Common Layers in Model

+

Activation

+ + +
+ + + +

+ mindcv.models.layers.activation.Swish + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Swish activation function: x * sigmoid(x).

+ +
+ Return +

Tensor

+
+
+ Example +
+
+
+

x = Tensor(((20, 16), (50, 50)), mindspore.float32) +Swish()(x)

+
+
+
+
+
+ Source code in mindcv/models/layers/activation.py +
10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
class Swish(nn.Cell):
+    """
+    Swish activation function: x * sigmoid(x).
+
+    Args:
+        None
+
+    Return:
+        Tensor
+
+    Example:
+        >>> x = Tensor(((20, 16), (50, 50)), mindspore.float32)
+        >>> Swish()(x)
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.result = None
+        self.sigmoid = nn.Sigmoid()
+
+    def construct(self, x):
+        result = x * self.sigmoid(x)
+        return result
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

DropPath

+ + +
+ + + +

+ mindcv.models.layers.drop_path.DropPath + + +

+ + +
+

+ Bases: nn.Cell

+ + +

DropPath (Stochastic Depth) regularization layers

+ +
+ Source code in mindcv/models/layers/drop_path.py +
12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
class DropPath(nn.Cell):
+    """DropPath (Stochastic Depth) regularization layers"""
+
+    def __init__(
+        self,
+        drop_prob: float = 0.0,
+        scale_by_keep: bool = True,
+    ) -> None:
+        super().__init__()
+        self.keep_prob = 1.0 - drop_prob
+        self.scale_by_keep = scale_by_keep
+        self.dropout = Dropout(p=drop_prob)
+
+    def construct(self, x: Tensor) -> Tensor:
+        if self.keep_prob == 1.0 or not self.training:
+            return x
+        shape = (x.shape[0],) + (1,) * (x.ndim - 1)
+        random_tensor = self.dropout(ones(shape))
+        if not self.scale_by_keep:
+            random_tensor = ops.mul(random_tensor, self.keep_prob)
+        return x * random_tensor
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Identity

+ + +
+ + + +

+ mindcv.models.layers.identity.Identity + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Identity

+ +
+ Source code in mindcv/models/layers/identity.py +
5
+6
+7
+8
+9
class Identity(nn.Cell):
+    """Identity"""
+
+    def construct(self, x):
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

MLP

+ + +
+ + + +

+ mindcv.models.layers.mlp.Mlp + + +

+ + +
+

+ Bases: nn.Cell

+ + +
+ Source code in mindcv/models/layers/mlp.py +
10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
class Mlp(nn.Cell):
+    def __init__(
+        self,
+        in_features: int,
+        hidden_features: Optional[int] = None,
+        out_features: Optional[int] = None,
+        act_layer: Optional[nn.Cell] = nn.GELU,
+        drop: float = 0.0,
+    ) -> None:
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Dense(in_channels=in_features, out_channels=hidden_features, has_bias=True)
+        self.act = act_layer()
+        self.fc2 = nn.Dense(in_channels=hidden_features, out_channels=out_features, has_bias=True)
+        self.drop = Dropout(p=drop)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Patch Embedding

+ + +
+ + + +

+ mindcv.models.layers.patch_embed.PatchEmbed + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Image to Patch Embedding

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
image_size +
+

Image size. Default: 224.

+
+

+ + TYPE: + int + + + DEFAULT: + 224 + +

+
patch_size +
+

Patch token size. Default: 4.

+
+

+ + TYPE: + int + + + DEFAULT: + 4 + +

+
in_chans +
+

Number of input image channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
embed_dim +
+

Number of linear projection output channels. Default: 96.

+
+

+ + TYPE: + int + + + DEFAULT: + 96 + +

+
norm_layer +
+

Normalization layer. Default: None

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + None + +

+
+ +
+ Source code in mindcv/models/layers/patch_embed.py +
10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
class PatchEmbed(nn.Cell):
+    """Image to Patch Embedding
+
+    Args:
+        image_size (int): Image size.  Default: 224.
+        patch_size (int): Patch token size. Default: 4.
+        in_chans (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        norm_layer (nn.Cell, optional): Normalization layer. Default: None
+    """
+
+    def __init__(
+        self,
+        image_size: int = 224,
+        patch_size: int = 4,
+        in_chans: int = 3,
+        embed_dim: int = 96,
+        norm_layer: Optional[nn.Cell] = None,
+    ) -> None:
+        super().__init__()
+        image_size = to_2tuple(image_size)
+        patch_size = to_2tuple(patch_size)
+        patches_resolution = [image_size[0] // patch_size[0], image_size[1] // patch_size[1]]
+        self.image_size = image_size
+        self.patch_size = patch_size
+        self.patches_resolution = patches_resolution
+        self.num_patches = patches_resolution[0] * patches_resolution[1]
+
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+
+        self.proj = nn.Conv2d(in_channels=in_chans, out_channels=embed_dim, kernel_size=patch_size, stride=patch_size,
+                              pad_mode='pad', has_bias=True, weight_init="TruncatedNormal")
+
+        if norm_layer is not None:
+            if isinstance(embed_dim, int):
+                embed_dim = (embed_dim,)
+            self.norm = norm_layer(embed_dim, epsilon=1e-5)
+        else:
+            self.norm = None
+
+    def construct(self, x: Tensor) -> Tensor:
+        """docstring"""
+        B = x.shape[0]
+        # FIXME look at relaxing size constraints
+        x = ops.Reshape()(self.proj(x), (B, self.embed_dim, -1))  # B Ph*Pw C
+        x = ops.Transpose()(x, (0, 2, 1))
+
+        if self.norm is not None:
+            x = self.norm(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.layers.patch_embed.PatchEmbed.construct(x) + +

+ + +
+ +

docstring

+ +
+ Source code in mindcv/models/layers/patch_embed.py +
51
+52
+53
+54
+55
+56
+57
+58
+59
+60
def construct(self, x: Tensor) -> Tensor:
+    """docstring"""
+    B = x.shape[0]
+    # FIXME look at relaxing size constraints
+    x = ops.Reshape()(self.proj(x), (B, self.embed_dim, -1))  # B Ph*Pw C
+    x = ops.Transpose()(x, (0, 2, 1))
+
+    if self.norm is not None:
+        x = self.norm(x)
+    return x
+
+
+
+ +
+ + + +
+ +
+ +

Pooling

+ + +
+ + + +

+ mindcv.models.layers.pooling.GlobalAvgPooling + + +

+ + +
+

+ Bases: nn.Cell

+ + +

GlobalAvgPooling, same as torch.nn.AdaptiveAvgPool2d when output shape is 1

+ +
+ Source code in mindcv/models/layers/pooling.py +
 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
class GlobalAvgPooling(nn.Cell):
+    """
+    GlobalAvgPooling, same as torch.nn.AdaptiveAvgPool2d when output shape is 1
+    """
+
+    def __init__(self, keep_dims: bool = False) -> None:
+        super().__init__()
+        self.keep_dims = keep_dims
+
+    def construct(self, x):
+        x = ops.mean(x, axis=(2, 3), keep_dims=self.keep_dims)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Selective Kernel

+ + +
+ + + +

+ mindcv.models.layers.selective_kernel.SelectiveKernelAttn + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Selective Kernel Attention Module +Selective Kernel attention mechanism factored out into its own module.

+ +
+ Source code in mindcv/models/layers/selective_kernel.py +
24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
class SelectiveKernelAttn(nn.Cell):
+    """Selective Kernel Attention Module
+    Selective Kernel attention mechanism factored out into its own module.
+    """
+
+    def __init__(
+        self,
+        channels: int,
+        num_paths: int = 2,
+        attn_channels: int = 32,
+        activation: Optional[nn.Cell] = nn.ReLU,
+        norm: Optional[nn.Cell] = nn.BatchNorm2d,
+    ):
+        super().__init__()
+        self.num_paths = num_paths
+        self.mean = GlobalAvgPooling(keep_dims=True)
+        self.fc_reduce = nn.Conv2d(channels, attn_channels, kernel_size=1, has_bias=False)
+        self.bn = norm(attn_channels)
+        self.act = activation()
+        self.fc_select = nn.Conv2d(attn_channels, channels * num_paths, kernel_size=1)
+        self.softmax = nn.Softmax(axis=1)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.mean((x.sum(1)))
+        x = self.fc_reduce(x)
+        x = self.bn(x)
+        x = self.act(x)
+        x = self.fc_select(x)
+        b, c, h, w = x.shape
+        x = x.reshape((b, self.num_paths, c // self.num_paths, h, w))
+        x = self.softmax(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ +
+ + + +

+ mindcv.models.layers.selective_kernel.SelectiveKernel + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Selective Kernel Convolution Module +As described in Selective Kernel Networks (https://arxiv.org/abs/1903.06586) with some modifications. +Largest change is the input split, which divides the input channels across each convolution path, this can +be viewed as a grouping of sorts, but the output channel counts expand to the module level value. This keeps +the parameter count from ballooning when the convolutions themselves don't have groups, but still provides +a noteworthy increase in performance over similar param count models without this attention layer. -Ross W

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
in_channels +
+

module input (feature) channel count

+
+

+ + TYPE: + int + +

+
out_channels +
+

module output (feature) channel count

+
+

+ + TYPE: + int + + + DEFAULT: + None + +

+
kernel_size +
+

kernel size for each convolution branch

+
+

+ + TYPE: + (int, list) + + + DEFAULT: + None + +

+
stride +
+

stride for convolutions

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
dilation +
+

dilation for module as a whole, impacts dilation of each branch

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
groups +
+

number of groups for each branch

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
rd_ratio +
+

reduction factor for attention features

+
+

+ + TYPE: + (int, float) + + + DEFAULT: + 1.0 / 16 + +

+
rd_channels(int) +
+

reduction channels can be specified directly by arg (if rd_channels is set)

+
+

+

+
rd_divisor(int) +
+

divisor can be specified to keep channels

+
+

+

+
keep_3x3 +
+

keep all branch convolution kernels as 3x3, changing larger kernels for dilations

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
split_input +
+

split input channels evenly across each convolution branch, keeps param count lower, +can be viewed as grouping by path, output expands to module out_channels count

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
activation +
+

activation layer to use

+
+

+ + TYPE: + nn.Module + + + DEFAULT: + nn.ReLU + +

+
norm +
+

batchnorm/norm layer to use

+
+

+ + TYPE: + nn.Module + + + DEFAULT: + nn.BatchNorm2d + +

+
+ +
+ Source code in mindcv/models/layers/selective_kernel.py +
 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
class SelectiveKernel(nn.Cell):
+    """Selective Kernel Convolution Module
+    As described in Selective Kernel Networks (https://arxiv.org/abs/1903.06586) with some modifications.
+    Largest change is the input split, which divides the input channels across each convolution path, this can
+    be viewed as a grouping of sorts, but the output channel counts expand to the module level value. This keeps
+    the parameter count from ballooning when the convolutions themselves don't have groups, but still provides
+    a noteworthy increase in performance over similar param count models without this attention layer. -Ross W
+    Args:
+        in_channels (int):  module input (feature) channel count
+        out_channels (int):  module output (feature) channel count
+        kernel_size (int, list): kernel size for each convolution branch
+        stride (int): stride for convolutions
+        dilation (int): dilation for module as a whole, impacts dilation of each branch
+        groups (int): number of groups for each branch
+        rd_ratio (int, float): reduction factor for attention features
+        rd_channels(int): reduction channels can be specified directly by arg (if rd_channels is set)
+        rd_divisor(int): divisor can be specified to keep channels
+        keep_3x3 (bool): keep all branch convolution kernels as 3x3, changing larger kernels for dilations
+        split_input (bool): split input channels evenly across each convolution branch, keeps param count lower,
+            can be viewed as grouping by path, output expands to module out_channels count
+        activation (nn.Module): activation layer to use
+        norm (nn.Module): batchnorm/norm layer to use
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: Optional[int] = None,
+        kernel_size: Optional[Union[int, List]] = None,
+        stride: int = 1,
+        dilation: int = 1,
+        groups: int = 1,
+        rd_ratio: float = 1.0 / 16,
+        rd_channels: Optional[int] = None,
+        rd_divisor: int = 8,
+        keep_3x3: bool = True,
+        split_input: bool = True,
+        activation: Optional[nn.Cell] = nn.ReLU,
+        norm: Optional[nn.Cell] = nn.BatchNorm2d,
+    ):
+        super().__init__()
+        out_channels = out_channels or in_channels
+        kernel_size = kernel_size or [3, 5]  # default to one 3x3 and one 5x5 branch. 5x5 -> 3x3 + dilation
+        _kernel_valid(kernel_size)
+        if not isinstance(kernel_size, list):
+            kernel_size = [kernel_size] * 2
+        if keep_3x3:
+            dilation = [dilation * (k - 1) // 2 for k in kernel_size]
+            kernel_size = [3] * len(kernel_size)
+        else:
+            dilation = [dilation] * len(kernel_size)
+        self.num_paths = len(kernel_size)
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.split_input = split_input
+        if self.split_input:
+            assert in_channels % self.num_paths == 0
+            in_channels = in_channels // self.num_paths
+        groups = min(out_channels, groups)
+        self.split = Split(split_size_or_sections=self.in_channels // self.num_paths, output_num=self.num_paths, axis=1)
+
+        self.paths = nn.CellList([
+            Conv2dNormActivation(in_channels, out_channels, kernel_size=k, stride=stride, groups=groups,
+                                 dilation=d, activation=activation, norm=norm)
+            for k, d in zip(kernel_size, dilation)
+        ])
+
+        attn_channels = rd_channels or make_divisible(out_channels * rd_ratio, divisor=rd_divisor)
+        self.attn = SelectiveKernelAttn(out_channels, self.num_paths, attn_channels)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x_paths = []
+        if self.split_input:
+            x_split = self.split(x)
+            for i, op in enumerate(self.paths):
+                x_paths.append(op(x_split[i]))
+        else:
+            for op in self.paths:
+                x_paths.append(op(x))
+
+        x = ops.stack(x_paths, axis=1)
+        x_attn = self.attn(x)
+        x = x * x_attn
+        x = x.sum(1)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Squeeze and Excite

+ + +
+ + + +

+ mindcv.models.layers.squeeze_excite.SqueezeExcite + + +

+ + +
+

+ Bases: nn.Cell

+ + +

SqueezeExcite Module as defined in original SE-Nets with a few additions.

+ +
+ Additions include +
    +
  • divisor can be specified to keep channels % div == 0 (default: 8)
  • +
  • reduction channels can be specified directly by arg (if rd_channels is set)
  • +
  • reduction channels can be specified by float rd_ratio (default: 1/16)
  • +
  • customizable activation, normalization, and gate layer
  • +
+
+
+ Source code in mindcv/models/layers/squeeze_excite.py +
14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
class SqueezeExcite(nn.Cell):
+    """SqueezeExcite Module as defined in original SE-Nets with a few additions.
+    Additions include:
+        * divisor can be specified to keep channels % div == 0 (default: 8)
+        * reduction channels can be specified directly by arg (if rd_channels is set)
+        * reduction channels can be specified by float rd_ratio (default: 1/16)
+        * customizable activation, normalization, and gate layer
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        rd_ratio: float = 1.0 / 16,
+        rd_channels: Optional[int] = None,
+        rd_divisor: int = 8,
+        norm: Optional[nn.Cell] = None,
+        act_layer: nn.Cell = nn.ReLU,
+        gate_layer: nn.Cell = nn.Sigmoid,
+    ) -> None:
+        super().__init__()
+        self.norm = norm
+        self.act = act_layer()
+        self.gate = gate_layer()
+        if not rd_channels:
+            rd_channels = make_divisible(in_channels * rd_ratio, rd_divisor)
+
+        self.conv_reduce = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=rd_channels,
+            kernel_size=1,
+            has_bias=True,
+        )
+        if self.norm:
+            self.bn = nn.BatchNorm2d(rd_channels)
+        self.conv_expand = nn.Conv2d(
+            in_channels=rd_channels,
+            out_channels=in_channels,
+            kernel_size=1,
+            has_bias=True,
+        )
+        self.pool = GlobalAvgPooling(keep_dims=True)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x_se = self.pool(x)
+        x_se = self.conv_reduce(x_se)
+        if self.norm:
+            x_se = self.bn(x_se)
+        x_se = self.act(x_se)
+        x_se = self.conv_expand(x_se)
+        x_se = self.gate(x_se)
+        x = x * x_se
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ +
+ + + +

+ mindcv.models.layers.squeeze_excite.SqueezeExciteV2 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

SqueezeExcite Module as defined in original SE-Nets with a few additions. +V1 uses 1x1conv to replace fc layers, and V2 uses nn.Dense to implement directly.

+ +
+ Source code in mindcv/models/layers/squeeze_excite.py +
 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
class SqueezeExciteV2(nn.Cell):
+    """SqueezeExcite Module as defined in original SE-Nets with a few additions.
+    V1 uses 1x1conv to replace fc layers, and V2 uses nn.Dense to implement directly.
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        rd_ratio: float = 1.0 / 16,
+        rd_channels: Optional[int] = None,
+        rd_divisor: int = 8,
+        norm: Optional[nn.Cell] = None,
+        act_layer: nn.Cell = nn.ReLU,
+        gate_layer: nn.Cell = nn.Sigmoid,
+    ) -> None:
+        super().__init__()
+        self.norm = norm
+        self.act = act_layer()
+        self.gate = gate_layer()
+        if not rd_channels:
+            rd_channels = make_divisible(in_channels * rd_ratio, rd_divisor)
+
+        self.conv_reduce = nn.Dense(
+            in_channels=in_channels,
+            out_channels=rd_channels,
+            has_bias=True,
+        )
+        if self.norm:
+            self.bn = nn.BatchNorm2d(rd_channels)
+        self.conv_expand = nn.Dense(
+            in_channels=rd_channels,
+            out_channels=in_channels,
+            has_bias=True,
+        )
+        self.pool = GlobalAvgPooling(keep_dims=False)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x_se = self.pool(x)
+        x_se = self.conv_reduce(x_se)
+        if self.norm:
+            x_se = self.bn(x_se)
+        x_se = self.act(x_se)
+        x_se = self.conv_expand(x_se)
+        x_se = self.gate(x_se)
+        x_se = ops.expand_dims(x_se, -1)
+        x_se = ops.expand_dims(x_se, -1)
+        x = x * x_se
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/reference/models/index.html b/zh/reference/models/index.html new file mode 100644 index 000000000..afb37949f --- /dev/null +++ b/zh/reference/models/index.html @@ -0,0 +1,40359 @@ + + + + + + + + + + + + + + + + + + + + + + + + models - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Models

+

Create Model

+ + + +
+ + + +

+mindcv.models.model_factory.create_model(model_name, num_classes=1000, pretrained=False, in_channels=3, checkpoint_path='', ema=False, auto_mapping=False, **kwargs) + +

+ + +
+ +

Creates model by name.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
model_name +
+

The name of model.

+
+

+ + TYPE: + str + +

+
num_classes +
+

The number of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
pretrained +
+

Whether to load the pretrained model. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
in_channels +
+

The input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
checkpoint_path +
+

The path of checkpoint files. Default: "".

+
+

+ + TYPE: + str + + + DEFAULT: + '' + +

+
ema +
+

Whether use ema method. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
auto_mapping +
+

Whether to automatically map the names of checkpoint weights +to the names of model weights when there are differences in names. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/model_factory.py +
 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
def create_model(
+    model_name: str,
+    num_classes: int = 1000,
+    pretrained: bool = False,
+    in_channels: int = 3,
+    checkpoint_path: str = "",
+    ema: bool = False,
+    auto_mapping: bool = False,
+    **kwargs,
+):
+    r"""Creates model by name.
+
+    Args:
+        model_name (str):  The name of model.
+        num_classes (int): The number of classes. Default: 1000.
+        pretrained (bool): Whether to load the pretrained model. Default: False.
+        in_channels (int): The input channels. Default: 3.
+        checkpoint_path (str): The path of checkpoint files. Default: "".
+        ema (bool): Whether use ema method. Default: False.
+        auto_mapping (bool): Whether to automatically map the names of checkpoint weights
+            to the names of model weights when there are differences in names. Default: False.
+    """
+
+    if checkpoint_path != "" and pretrained:
+        raise ValueError("checkpoint_path is mutually exclusive with pretrained")
+
+    model_args = dict(num_classes=num_classes, pretrained=pretrained, in_channels=in_channels)
+    kwargs = {k: v for k, v in kwargs.items() if v is not None}
+
+    if not is_model(model_name):
+        raise RuntimeError(f"Unknown model {model_name}")
+
+    create_fn = model_entrypoint(model_name)
+    model = create_fn(**model_args, **kwargs)
+
+    if checkpoint_path:
+        load_model_checkpoint(model, checkpoint_path, ema, auto_mapping)
+
+    return model
+
+
+
+ +

bit

+ + +
+ + + +

+ mindcv.models.bit.BiT_ResNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

BiT_ResNet model class, based on +"Big Transfer (BiT): General Visual Representation Learning" <https://arxiv.org/abs/1912.11370>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
block(Union[Bottleneck]) +
+

block of BiT_ResNetv2.

+
+

+

+
layers(tuple(int)) +
+

number of layers of each stage.

+
+

+

+
wf(int) +
+

width of each layer. Default: 1.

+
+

+

+
num_classes(int) +
+

number of classification classes. Default: 1000.

+
+

+

+
in_channels(int) +
+

number the channels of the input. Default: 3.

+
+

+

+
groups(int) +
+

number of groups for group conv in blocks. Default: 1.

+
+

+

+
base_width(int) +
+

base width of pre group hidden channel in blocks. Default: 64.

+
+

+

+
norm(nn.Cell) +
+

normalization layer in blocks. Default: None.

+
+

+

+
+ +
+ Source code in mindcv/models/bit.py +
149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
class BiT_ResNet(nn.Cell):
+    r"""BiT_ResNet model class, based on
+    `"Big Transfer (BiT): General Visual Representation Learning" <https://arxiv.org/abs/1912.11370>`_
+    Args:
+        block(Union[Bottleneck]): block of BiT_ResNetv2.
+        layers(tuple(int)): number of layers of each stage.
+        wf(int): width of each layer. Default: 1.
+        num_classes(int): number of classification classes. Default: 1000.
+        in_channels(int): number the channels of the input. Default: 3.
+        groups(int): number of groups for group conv in blocks. Default: 1.
+        base_width(int): base width of pre group hidden channel in blocks. Default: 64.
+        norm(nn.Cell): normalization layer in blocks. Default: None.
+    """
+
+    def __init__(
+        self,
+        block: Type[Union[Bottleneck]],
+        layers: List[int],
+        wf: int = 1,
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        groups: int = 1,
+        base_width: int = 64,
+        norm: Optional[nn.Cell] = None,
+    ) -> None:
+        super().__init__()
+
+        if norm is None:
+            norm = nn.GroupNorm
+
+        self.norm: nn.Cell = norm  # add type hints to make pylint happy
+        self.input_channels = 64 * wf
+        self.groups = groups
+        self.base_with = base_width
+
+        self.conv1 = StdConv2d(in_channels, self.input_channels, kernel_size=7,
+                               stride=2, pad_mode="pad", padding=3)
+        self.pad = nn.ConstantPad2d(1, 0)
+        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="valid")
+
+        self.layer1 = self._make_layer(block, 64 * wf, layers[0])
+        self.layer2 = self._make_layer(block, 128 * wf, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256 * wf, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512 * wf, layers[3], stride=2)
+
+        self.gn = norm(32, 2048 * wf)
+        self.relu = nn.ReLU()
+        self.pool = GlobalAvgPooling(keep_dims=True)
+        self.classifier = nn.Conv2d(512 * block.expansion * wf, num_classes, kernel_size=1, has_bias=True)
+
+    def _make_layer(
+        self,
+        block: Type[Union[Bottleneck]],
+        channels: int,
+        block_nums: int,
+        stride: int = 1,
+    ) -> nn.SequentialCell:
+        """build model depending on cfgs"""
+        down_sample = None
+
+        if stride != 1 or self.input_channels != channels * block.expansion:
+            down_sample = nn.SequentialCell([
+                StdConv2d(self.input_channels, channels * block.expansion, kernel_size=1, stride=stride),
+            ])
+
+        layers = []
+        layers.append(
+            block(
+                self.input_channels,
+                channels,
+                stride=stride,
+                down_sample=down_sample,
+                groups=self.groups,
+                base_width=self.base_with,
+                norm=self.norm,
+            )
+        )
+        self.input_channels = channels * block.expansion
+
+        for _ in range(1, block_nums):
+            layers.append(
+                block(
+                    self.input_channels,
+                    channels,
+                    groups=self.groups,
+                    base_width=self.base_with,
+                    norm=self.norm,
+                )
+            )
+
+        return nn.SequentialCell(layers)
+
+    def root(self, x: Tensor) -> Tensor:
+        x = self.conv1(x)
+        x = self.pad(x)
+        x = self.max_pool(x)
+        return x
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        """Network forward feature extraction."""
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.gn(x)
+        x = self.relu(x)
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.root(x)
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        assert x.shape[-2:] == (1, 1)  # We should have no spatial shape left.
+        return x[..., 0, 0]
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.bit.BiT_ResNet.forward_features(x) + +

+ + +
+ +

Network forward feature extraction.

+ +
+ Source code in mindcv/models/bit.py +
247
+248
+249
+250
+251
+252
+253
def forward_features(self, x: Tensor) -> Tensor:
+    """Network forward feature extraction."""
+    x = self.layer1(x)
+    x = self.layer2(x)
+    x = self.layer3(x)
+    x = self.layer4(x)
+    return x
+
+
+
+ +
+ + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.bit.BiT_resnet101(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 101 layers ResNet model. +Refer to the base class models.BiT_Resnet for more details.

+ +
+ Source code in mindcv/models/bit.py +
298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
@register_model
+def BiT_resnet101(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 101 layers ResNet model.
+    Refer to the base class `models.BiT_Resnet` for more details.
+    """
+    default_cfg = default_cfgs["BiT_resnet101"]
+    model = BiT_ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.bit.BiT_resnet50(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers ResNet model. +Refer to the base class models.BiT_Resnet for more details.

+ +
+ Source code in mindcv/models/bit.py +
270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
@register_model
+def BiT_resnet50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 50 layers ResNet model.
+    Refer to the base class `models.BiT_Resnet` for more details.
+    """
+    default_cfg = default_cfgs["BiT_resnet50"]
+    model = BiT_ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.bit.BiT_resnet50x3(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers ResNet model. +Refer to the base class models.BiT_Resnet for more details.

+ +
+ Source code in mindcv/models/bit.py +
284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
@register_model
+def BiT_resnet50x3(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 50 layers ResNet model.
+     Refer to the base class `models.BiT_Resnet` for more details.
+     """
+    default_cfg = default_cfgs["BiT_resnet50x3"]
+    model = BiT_ResNet(Bottleneck, [3, 4, 6, 3], wf=3, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

cait

+ + +
+ + + +

+ mindcv.models.cait.CaiT + + +

+ + +
+

+ Bases: nn.Cell

+ + +
+ Source code in mindcv/models/cait.py +
241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
class CaiT(nn.Cell):
+    def __init__(self,
+                 img_size: int = 224,
+                 patch_size: int = 16,
+                 in_channels: int = 3,
+                 num_classes: int = 1000,
+                 embed_dim: int = 768,
+                 depth: int = 12,
+                 num_heads: int = 12,
+                 mlp_ratio: float = 4.,
+                 qkv_bias: bool = False,
+                 qk_scale: float = None,
+                 drop_rate: float = 0.,
+                 attn_drop_rate: float = 0.,
+                 drop_path_rate: float = 0.,
+                 norm_layer: nn.Cell = nn.LayerNorm,
+                 act_layer: nn.Cell = nn.GELU,
+                 init_values: float = 1e-4,
+                 depth_token_only: int = 2,
+                 mlp_ratio_clstk: float = 4.0) -> None:
+        super(CaiT, self).__init__()
+        self.num_classes = num_classes
+        self.embed_dim = embed_dim
+
+        self.patch_embed = PatchEmbed(image_size=img_size,
+                                      patch_size=patch_size,
+                                      in_chans=in_channels,
+                                      embed_dim=embed_dim)
+
+        num_patches = self.patch_embed.num_patches
+
+        zeros = ops.Zeros()
+        self.cls_token = Parameter(zeros((1, 1, embed_dim), ms.float32))
+        self.pos_embed = Parameter(zeros((1, num_patches, embed_dim), ms.float32))
+        self.pos_drop = Dropout(p=drop_rate)
+
+        dpr = [drop_path_rate for i in range(depth)]
+
+        self.blocks = []
+        self.blocks_token_only = []
+
+        self.blocks = nn.CellList([
+            LayerScaleBlockSA(
+                dim=embed_dim,
+                num_heads=num_heads,
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop_rate=drop_rate,
+                attn_drop_rate=attn_drop_rate,
+                drop_path_rate=dpr[i],
+                act_layer=act_layer,
+                norm_layer=norm_layer,
+                init_values=init_values)
+            for i in range(depth)])
+        self.blocks_token_only = nn.CellList([
+            LayerScaleBlockCA(
+                dim=embed_dim,
+                num_heads=num_heads,
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop_rate=0.0,
+                attn_drop_rate=0.0,
+                drop_path_rate=0.0,
+                act_layer=act_layer,
+                norm_layer=norm_layer,
+                init_values=init_values)
+            for i in range(depth_token_only)])
+
+        self.norm = norm_layer((embed_dim,))
+
+        self.head = nn.Dense(embed_dim, num_classes) if num_classes > 0 else nn.Identity()
+
+        self.pos_embed = init.initializer(TruncatedNormal(sigma=0.02), self.pos_embed.shape, ms.float32)
+        self.cls_token = init.initializer(TruncatedNormal(sigma=0.02), self.cls_token.shape, ms.float32)
+
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        for name, m in self.cells_and_names():
+            if isinstance(m, nn.Dense):
+                m.weight = init.initializer(TruncatedNormal(sigma=0.02), m.weight.shape, ms.float32)
+                if m.bias is not None:
+                    m.bias.set_data(init.initializer(init.Constant(0), m.bias.shape))
+            elif isinstance(m, nn.LayerNorm):
+                m.beta.set_data(init.initializer(init.Constant(0), m.beta.shape))
+                m.gamma.set_data(init.initializer(init.Constant(1), m.gamma.shape))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        B = x.shape[0]
+        x = self.patch_embed(x)
+
+        cls_tokens = ops.broadcast_to(self.cls_token, (B, -1, -1))
+
+        x = x + self.pos_embed
+        x = self.pos_drop(x)
+
+        for i , blk in enumerate(self.blocks):
+            x = blk(x)
+        for i , blk in enumerate(self.blocks_token_only):
+            cls_tokens = blk(x, cls_tokens)
+
+        x = ops.concat((cls_tokens, x), axis=1)
+
+        x = self.norm(x)
+        return x[:, 0]
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.head(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.cait.cait_m36_384(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/cait.py +
419
+420
+421
+422
+423
+424
+425
+426
+427
+428
@register_model
+def cait_m36_384(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:
+    model = CaiT(img_size=384, patch_size=16, in_channels=in_channels, num_classes=num_classes,
+                 embed_dim=768, depth=36, num_heads=16, mlp_ratio=4, qkv_bias=False,
+                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-6, depth_token_only=2,
+                 **kwargs)
+
+    if pretrained:
+        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cait.cait_m48_448(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/cait.py +
431
+432
+433
+434
+435
+436
+437
+438
+439
+440
@register_model
+def cait_m48_448(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:
+    model = CaiT(img_size=448, patch_size=16, in_channels=in_channels, num_classes=num_classes,
+                 embed_dim=768, depth=48, num_heads=16, mlp_ratio=4, qkv_bias=False,
+                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-6, depth_token_only=2,
+                 **kwargs)
+
+    if pretrained:
+        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cait.cait_s24_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/cait.py +
383
+384
+385
+386
+387
+388
+389
+390
+391
+392
@register_model
+def cait_s24_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:
+    model = CaiT(img_size=224, patch_size=16, in_channels=in_channels, num_classes=num_classes,
+                 embed_dim=384, depth=24, num_heads=8, mlp_ratio=4, qkv_bias=False,
+                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-5, depth_token_only=2,
+                 **kwargs)
+
+    if pretrained:
+        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cait.cait_s24_384(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/cait.py +
395
+396
+397
+398
+399
+400
+401
+402
+403
+404
@register_model
+def cait_s24_384(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:
+    model = CaiT(img_size=384, patch_size=16, in_channels=in_channels, num_classes=num_classes,
+                 embed_dim=384, depth=24, num_heads=8, mlp_ratio=4, qkv_bias=False,
+                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-5, depth_token_only=2,
+                 **kwargs)
+
+    if pretrained:
+        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cait.cait_s36_384(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/cait.py +
407
+408
+409
+410
+411
+412
+413
+414
+415
+416
@register_model
+def cait_s36_384(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:
+    model = CaiT(img_size=384, patch_size=16, in_channels=in_channels, num_classes=num_classes,
+                 embed_dim=384, depth=36, num_heads=8, mlp_ratio=4, qkv_bias=False,
+                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-6, depth_token_only=2,
+                 **kwargs)
+
+    if pretrained:
+        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cait.cait_xs24_384(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/cait.py +
371
+372
+373
+374
+375
+376
+377
+378
+379
+380
@register_model
+def cait_xs24_384(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:
+    model = CaiT(img_size=384, patch_size=16, in_channels=in_channels, num_classes=num_classes,
+                 embed_dim=288, depth=24, num_heads=6, mlp_ratio=4, qkv_bias=False,
+                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-5, depth_token_only=2,
+                 **kwargs)
+
+    if pretrained:
+        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cait.cait_xxs24_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/cait.py +
359
+360
+361
+362
+363
+364
+365
+366
+367
+368
@register_model
+def cait_xxs24_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> CaiT:
+    model = CaiT(img_size=224, patch_size=16, in_channels=in_channels, num_classes=num_classes,
+                 embed_dim=192, depth=24, num_heads=4, mlp_ratio=4, qkv_bias=False,
+                 norm_layer=partial(nn.LayerNorm, epsilon=1e-6), init_values=1e-5, depth_token_only=2,
+                 **kwargs)
+
+    if pretrained:
+        load_pretrained(model, _cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

cmt

+ + +
+ + + +

+ mindcv.models.cmt.CMT + + +

+ + +
+

+ Bases: nn.Cell

+ + +
+ Source code in mindcv/models/cmt.py +
205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
class CMT(nn.Cell):
+    def __init__(
+        self,
+        img_size=224,
+        in_channels=3,
+        num_classes=1000,
+        embed_dims=None,
+        stem_channel=16,
+        fc_dim=1280,
+        num_heads=None,
+        mlp_ratios=None,
+        qkv_bias=True,
+        qk_scale=None,
+        drop_rate=0.0,
+        attn_drop_rate=0.0,
+        drop_path_rate=0.0,
+        norm_layer=None,
+        depths=None,
+        qk_ratio=1,
+        sr_ratios=None,
+    ):
+        super().__init__()
+        self.num_classes = num_classes
+        self.num_features = self.embed_dim = embed_dims[-1]
+        norm_layer = norm_layer or nn.LayerNorm
+
+        self.stem_conv1 = nn.Conv2d(
+            3, stem_channel, kernel_size=3, stride=2, pad_mode='pad', padding=1, has_bias=True)
+        self.stem_relu1 = nn.GELU()
+        self.stem_norm1 = nn.BatchNorm2d(stem_channel)
+
+        self.stem_conv2 = nn.Conv2d(
+            stem_channel, stem_channel, kernel_size=3, stride=1, pad_mode='pad', padding=1, has_bias=True)
+        self.stem_relu2 = nn.GELU()
+        self.stem_norm2 = nn.BatchNorm2d(stem_channel)
+
+        self.stem_conv3 = nn.Conv2d(
+            stem_channel, stem_channel, kernel_size=3, stride=1, pad_mode='pad', padding=1, has_bias=True)
+        self.stem_relu3 = nn.GELU()
+        self.stem_norm3 = nn.BatchNorm2d(stem_channel)
+
+        self.patch_embed_a = PatchEmbed(
+            img_size=img_size // 2, patch_size=2, in_chans=stem_channel, embed_dim=embed_dims[0])
+        self.patch_embed_b = PatchEmbed(
+            img_size=img_size // 4, patch_size=2, in_chans=embed_dims[0], embed_dim=embed_dims[1])
+        self.patch_embed_c = PatchEmbed(
+            img_size=img_size // 8, patch_size=2, in_chans=embed_dims[1], embed_dim=embed_dims[2])
+        self.patch_embed_d = PatchEmbed(
+            img_size=img_size // 16, patch_size=2, in_chans=embed_dims[2], embed_dim=embed_dims[3])
+
+        self.relative_pos_a = ops.zeros(
+            (num_heads[0], self.patch_embed_a.num_patches,
+             self.patch_embed_a.num_patches // sr_ratios[0] // sr_ratios[0]),
+            mindspore.float32)
+        self.relative_pos_b = ops.zeros(
+            (num_heads[1], self.patch_embed_b.num_patches,
+             self.patch_embed_b.num_patches // sr_ratios[1] // sr_ratios[1]),
+            mindspore.float32)
+        self.relative_pos_c = ops.zeros(
+            (num_heads[2], self.patch_embed_c.num_patches,
+             self.patch_embed_c.num_patches // sr_ratios[2] // sr_ratios[2]),
+            mindspore.float32)
+        self.relative_pos_d = ops.zeros(
+            (num_heads[3], self.patch_embed_d.num_patches,
+             self.patch_embed_d.num_patches // sr_ratios[3] // sr_ratios[3]),
+            mindspore.float32)
+
+        # stochastic depth decay rule
+        dpr = [x.item() for x in np.linspace(0, drop_path_rate, sum(depths))]
+        cur = 0
+        self.blocks_a = nn.CellList([
+            Block(
+                dim=embed_dims[0], num_heads=num_heads[0], mlp_ratio=mlp_ratios[0], qkv_bias=qkv_bias,
+                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[
+                    cur + i],
+                norm_layer=norm_layer, qk_ratio=qk_ratio, sr_ratio=sr_ratios[0])
+            for i in range(depths[0])])
+        cur += depths[0]
+        self.blocks_b = nn.CellList([
+            Block(
+                dim=embed_dims[1], num_heads=num_heads[1], mlp_ratio=mlp_ratios[1], qkv_bias=qkv_bias,
+                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[
+                    cur + i],
+                norm_layer=norm_layer, qk_ratio=qk_ratio, sr_ratio=sr_ratios[1])
+            for i in range(depths[1])])
+        cur += depths[1]
+        self.blocks_c = nn.CellList([
+            Block(
+                dim=embed_dims[2], num_heads=num_heads[2], mlp_ratio=mlp_ratios[2], qkv_bias=qkv_bias,
+                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[
+                    cur + i],
+                norm_layer=norm_layer, qk_ratio=qk_ratio, sr_ratio=sr_ratios[2])
+            for i in range(depths[2])])
+        cur += depths[2]
+        self.blocks_d = nn.CellList([
+            Block(
+                dim=embed_dims[3], num_heads=num_heads[3], mlp_ratio=mlp_ratios[3], qkv_bias=qkv_bias,
+                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[
+                    cur + i],
+                norm_layer=norm_layer, qk_ratio=qk_ratio, sr_ratio=sr_ratios[3])
+            for i in range(depths[3])])
+
+        # Classifier head
+        self._fc = nn.Conv2d(
+            embed_dims[-1], fc_dim, kernel_size=1, has_bias=True)
+        self._bn = nn.BatchNorm2d(fc_dim)
+        self._drop = Dropout(p=drop_rate)
+        self.head = nn.Dense(
+            fc_dim, num_classes) if num_classes > 0 else ops.Identity()
+
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(mode='fan_out', nonlinearity='relu'),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer('zeros', cell.bias.shape, cell.bias.dtype))
+
+            if isinstance(cell, nn.Dense):
+                cell.weight.set_data(init.initializer(init.TruncatedNormal(sigma=0.02), cell.weight.shape,
+                                                      cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer('zeros', cell.bias.shape, cell.bias.dtype))
+
+            elif isinstance(cell, (nn.LayerNorm, nn.BatchNorm2d)):
+                cell.gamma.set_data(init.initializer('ones', cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer('zeros', cell.beta.shape, cell.beta.dtype))
+
+    def forward_features(self, x):
+        B = x.shape[0]
+        x = self.stem_conv1(x)
+        x = self.stem_relu1(x)
+        x = self.stem_norm1(x)
+
+        x = self.stem_conv2(x)
+        x = self.stem_relu2(x)
+        x = self.stem_norm2(x)
+
+        x = self.stem_conv3(x)
+        x = self.stem_relu3(x)
+        x = self.stem_norm3(x)
+
+        x, (H, W) = self.patch_embed_a(x)
+        for _, blk in enumerate(self.blocks_a):
+            x = blk(x, H, W, self.relative_pos_a)
+
+        x = ops.transpose(x.reshape(B, H, W, -1), (0, 3, 1, 2))
+        x, (H, W) = self.patch_embed_b(x)
+        for _, blk in enumerate(self.blocks_b):
+            x = blk(x, H, W, self.relative_pos_b)
+
+        x = ops.transpose(x.reshape(B, H, W, -1), (0, 3, 1, 2))
+        x, (H, W) = self.patch_embed_c(x)
+        for _, blk in enumerate(self.blocks_c):
+            x = blk(x, H, W, self.relative_pos_c)
+
+        x = ops.transpose(x.reshape(B, H, W, -1), (0, 3, 1, 2))
+        x, (H, W) = self.patch_embed_d(x)
+        for _, blk in enumerate(self.blocks_d):
+            x = blk(x, H, W, self.relative_pos_d)
+
+        B, _, C = x.shape
+
+        x = self._fc(ops.transpose(x, (0, 2, 1)).reshape(B, C, H, W))
+        x = self._bn(x)
+        x = swish(x)
+        x = GlobalAvgPooling()(x)
+        x = self._drop(x)
+        return x
+
+    def forward_head(self, x):
+        x = self.head(x)
+        return x
+
+    def construct(self, x):
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.cmt.cmt_base(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

CMT-Base

+ +
+ Source code in mindcv/models/cmt.py +
441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
@register_model
+def cmt_base(pretrained=False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """
+    CMT-Base
+    """
+    default_cfg = default_cfgs["cmt_base"]
+
+    model = CMT(img_size=256, num_classes=num_classes, in_channels=in_channels, qkv_bias=True,
+                embed_dims=[76, 152, 304, 608], stem_channel=38, num_heads=[1, 2, 4, 8], depths=[4, 4, 20, 4],
+                mlp_ratios=[4, 4, 4, 4], qk_ratio=1, sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cmt.cmt_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

CMT-Small

+ +
+ Source code in mindcv/models/cmt.py +
424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
@register_model
+def cmt_small(pretrained=False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """
+    CMT-Small
+    """
+    default_cfg = default_cfgs["cmt_small"]
+
+    model = CMT(img_size=224, num_classes=num_classes, in_channels=in_channels, qkv_bias=True,
+                embed_dims=[64, 128, 256, 512], stem_channel=32, num_heads=[1, 2, 4, 8], depths=[3, 3, 16, 3],
+                mlp_ratios=[4, 4, 4, 4], qk_ratio=1, sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cmt.cmt_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

CMT-tiny

+ +
+ Source code in mindcv/models/cmt.py +
390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
@register_model
+def cmt_tiny(pretrained=False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """
+    CMT-tiny
+    """
+    default_cfg = default_cfgs["cmt_tiny"]
+
+    model = CMT(img_size=160, num_classes=num_classes, in_channels=in_channels, qkv_bias=True,
+                embed_dims=[46, 92, 184, 368], stem_channel=16, num_heads=[1, 2, 4, 8], depths=[2, 2, 10, 2],
+                mlp_ratios=[3.6, 3.6, 3.6, 3.6], qk_ratio=1, sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.cmt.cmt_xsmall(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

CMT-XSmall

+ +
+ Source code in mindcv/models/cmt.py +
407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
@register_model
+def cmt_xsmall(pretrained=False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """
+    CMT-XSmall
+    """
+    default_cfg = default_cfgs["cmt_xsmall"]
+
+    model = CMT(img_size=192, num_classes=num_classes, in_channels=in_channels, qkv_bias=True,
+                embed_dims=[52, 104, 208, 416], stem_channel=16, num_heads=[1, 2, 4, 8], depths=[3, 3, 12, 3],
+                mlp_ratios=[3.8, 3.8, 3.8, 3.8], qk_ratio=1, sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

coat

+ + + +
+ + + +

+mindcv.models.coat.coat_lite_medium(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/coat.py +
777
+778
+779
+780
+781
+782
+783
+784
+785
+786
+787
+788
@register_model
+def coat_lite_medium(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs['coat_lite_medium']
+    model = CoaT(in_channels=in_channels, num_classes=num_classes,
+                 patch_size=4, embed_dims=[128, 256, 320, 512],
+                 serial_depths=[3, 6, 10, 8], parallel_depth=0,
+                 num_heads=8, mlp_ratios=[4, 4, 4, 4], **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.coat.coat_lite_mini(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/coat.py +
749
+750
+751
+752
+753
+754
+755
+756
+757
+758
+759
+760
@register_model
+def coat_lite_mini(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs['coat_lite_mini']
+    model = CoaT(in_channels=in_channels, num_classes=num_classes,
+                 patch_size=4, embed_dims=[64, 128, 320, 512],
+                 serial_depths=[2, 2, 2, 2], parallel_depth=0,
+                 num_heads=8, mlp_ratios=[8, 8, 4, 4], **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.coat.coat_lite_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/coat.py +
763
+764
+765
+766
+767
+768
+769
+770
+771
+772
+773
+774
@register_model
+def coat_lite_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs['coat_lite_small']
+    model = CoaT(in_channels=in_channels, num_classes=num_classes,
+                 patch_size=4, embed_dims=[64, 128, 320, 512],
+                 serial_depths=[3, 4, 6, 3], parallel_depth=0,
+                 num_heads=8, mlp_ratios=[8, 8, 4, 4], **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.coat.coat_lite_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/coat.py +
735
+736
+737
+738
+739
+740
+741
+742
+743
+744
+745
+746
@register_model
+def coat_lite_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs['coat_lite_tiny']
+    model = CoaT(in_channels=in_channels, num_classes=num_classes,
+                 patch_size=4, embed_dims=[64, 128, 256, 320],
+                 serial_depths=[2, 2, 2, 2], parallel_depth=0,
+                 num_heads=8, mlp_ratios=[8, 8, 4, 4], **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.coat.coat_mini(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/coat.py +
707
+708
+709
+710
+711
+712
+713
+714
+715
+716
+717
+718
@register_model
+def coat_mini(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs['coat_mini']
+    model = CoaT(in_channels=in_channels, num_classes=num_classes,
+                 patch_size=4, embed_dims=[152, 216, 216, 216],
+                 serial_depths=[2, 2, 2, 2], parallel_depth=6,
+                 num_heads=8, mlp_ratios=[4, 4, 4, 4], **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.coat.coat_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/coat.py +
721
+722
+723
+724
+725
+726
+727
+728
+729
+730
+731
+732
@register_model
+def coat_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs['coat_small']
+    model = CoaT(in_channels=in_channels, num_classes=num_classes,
+                 patch_size=4, embed_dims=[152, 320, 320, 320],
+                 serial_depths=[2, 2, 2, 2], parallel_depth=6,
+                 num_heads=8, mlp_ratios=[4, 4, 4, 4], **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.coat.coat_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/coat.py +
693
+694
+695
+696
+697
+698
+699
+700
+701
+702
+703
+704
@register_model
+def coat_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs['coat_mini']
+    model = CoaT(in_channels=in_channels, num_classes=num_classes,
+                 patch_size=4, embed_dims=[152, 152, 152, 152],
+                 serial_depths=[2, 2, 2, 2], parallel_depth=6,
+                 num_heads=8, mlp_ratios=[4, 4, 4, 4], **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

convit

+ + +
+ + + +

+ mindcv.models.convit.ConViT + + +

+ + +
+

+ Bases: nn.Cell

+ + +

ConViT model class, based on +'"Improving Vision Transformers with Soft Convolutional Inductive Biases" +https://arxiv.org/pdf/2103.10697.pdf'

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int) + + + DEFAULT: + 1000 + +

+
image_size +
+

images input size. Default: 224.

+
+

+ + TYPE: + int) + + + DEFAULT: + 224 + +

+
patch_size +
+

image patch size. Default: 16.

+
+

+ + TYPE: + int) + + + DEFAULT: + 16 + +

+
embed_dim +
+

embedding dimension in all head. Default: 48.

+
+

+ + TYPE: + int) + + + DEFAULT: + 48 + +

+
num_heads +
+

number of heads. Default: 12.

+
+

+ + TYPE: + int) + + + DEFAULT: + 12 + +

+
drop_rate +
+

dropout rate. Default: 0.

+
+

+ + TYPE: + float) + + + DEFAULT: + 0.0 + +

+
drop_path_rate +
+

drop path rate. Default: 0.1.

+
+

+ + TYPE: + float) + + + DEFAULT: + 0.1 + +

+
depth +
+

model block depth. Default: 12.

+
+

+ + TYPE: + int) + + + DEFAULT: + 12 + +

+
mlp_ratio +
+

ratio of hidden features in Mlp. Default: 4.

+
+

+ + TYPE: + float) + + + DEFAULT: + 4.0 + +

+
qkv_bias +
+

have bias in qkv layers or not. Default: False.

+
+

+ + TYPE: + bool) + + + DEFAULT: + False + +

+
attn_drop_rate +
+

attention layers dropout rate. Default: 0.

+
+

+ + TYPE: + float) + + + DEFAULT: + 0.0 + +

+
locality_strength +
+

determines how focused each head is around its attention center. Default: 1.

+
+

+ + TYPE: + float) + + + DEFAULT: + 1.0 + +

+
local_up_to_layer +
+

number of GPSA layers. Default: 10.

+
+

+ + TYPE: + int) + + + DEFAULT: + 10 + +

+
use_pos_embed +
+

whether use the embeded position. Default: True.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
locality_strength(float) +
+

the strength of locality. Default: 1.

+
+

+

+
+ +
+ Source code in mindcv/models/convit.py +
210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
class ConViT(nn.Cell):
+    r"""ConViT model class, based on
+    '"Improving Vision Transformers with Soft Convolutional Inductive Biases"
+    <https://arxiv.org/pdf/2103.10697.pdf>'
+
+    Args:
+        in_channels (int): number the channels of the input. Default: 3.
+        num_classes (int) : number of classification classes. Default: 1000.
+        image_size (int) : images input size. Default: 224.
+        patch_size (int) : image patch size. Default: 16.
+        embed_dim (int) : embedding dimension in all head. Default: 48.
+        num_heads (int) : number of heads. Default: 12.
+        drop_rate (float) : dropout rate. Default: 0.
+        drop_path_rate (float) : drop path rate. Default: 0.1.
+        depth (int) : model block depth. Default: 12.
+        mlp_ratio (float) : ratio of hidden features in Mlp. Default: 4.
+        qkv_bias (bool) : have bias in qkv layers or not. Default: False.
+        attn_drop_rate (float) : attention layers dropout rate. Default: 0.
+        locality_strength (float) : determines how focused each head is around its attention center. Default: 1.
+        local_up_to_layer (int) : number of GPSA layers. Default: 10.
+        use_pos_embed (bool): whether use the embeded position.  Default: True.
+        locality_strength(float): the strength of locality. Default: 1.
+    """
+
+    def __init__(
+        self,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+        image_size: int = 224,
+        patch_size: int = 16,
+        embed_dim: int = 48,
+        num_heads: int = 12,
+        drop_rate: float = 0.0,
+        drop_path_rate: float = 0.1,
+        depth: int = 12,
+        mlp_ratio: float = 4.0,
+        qkv_bias: bool = False,
+        attn_drop_rate: float = 0.0,
+        local_up_to_layer: int = 10,
+        use_pos_embed: bool = True,
+        locality_strength: float = 1.0,
+    ) -> None:
+        super().__init__()
+
+        self.local_up_to_layer = local_up_to_layer
+        self.use_pos_embed = use_pos_embed
+        self.num_heads = num_heads
+        self.locality_strength = locality_strength
+        self.embed_dim = embed_dim
+
+        self.patch_embed = PatchEmbed(
+            image_size=image_size, patch_size=patch_size, in_chans=in_channels, embed_dim=embed_dim)
+        self.num_patches = self.patch_embed.num_patches
+
+        self.cls_token = Parameter(ops.Zeros()((1, 1, embed_dim), ms.float32))
+        self.pos_drop = Dropout(p=drop_rate)
+
+        if self.use_pos_embed:
+            self.pos_embed = Parameter(ops.Zeros()((1, self.num_patches, embed_dim), ms.float32))
+            self.pos_embed.set_data(init.initializer(init.TruncatedNormal(sigma=0.02), self.pos_embed.data.shape))
+
+        dpr = [x.item() for x in np.linspace(0, drop_path_rate, depth)]
+        self.blocks = nn.CellList([
+            Block(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
+                drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
+                use_gpsa=True)
+            if i < local_up_to_layer else
+            Block(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
+                drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
+                use_gpsa=False)
+            for i in range(depth)])
+        self.norm = nn.LayerNorm((embed_dim,))
+
+        self.classifier = nn.Dense(in_channels=embed_dim, out_channels=num_classes) if num_classes > 0 else Identity()
+        self.cls_token.set_data(init.initializer(init.TruncatedNormal(sigma=0.02), self.cls_token.data.shape))
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Dense):
+                cell.weight.set_data(init.initializer(init.TruncatedNormal(sigma=0.02), cell.weight.data.shape))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Constant(0), cell.bias.shape))
+            elif isinstance(cell, nn.LayerNorm):
+                cell.gamma.set_data(init.initializer(init.Constant(1), cell.gamma.shape))
+                cell.beta.set_data(init.initializer(init.Constant(0), cell.beta.shape))
+        # local init
+        for i in range(self.local_up_to_layer):
+            self.blocks[i].attn.v.weight.set_data(ops.eye(self.embed_dim, self.embed_dim, ms.float32), slice_shape=True)
+            locality_distance = 1
+            kernel_size = int(self.num_heads**0.5)
+            center = (kernel_size - 1) / 2 if kernel_size % 2 == 0 else kernel_size // 2
+            pos_weight_data = self.blocks[i].attn.pos_proj.weight.data
+            for h1 in range(kernel_size):
+                for h2 in range(kernel_size):
+                    position = h1 + kernel_size * h2
+                    pos_weight_data[position, 2] = -1
+                    pos_weight_data[position, 1] = 2 * (h1 - center) * locality_distance
+                    pos_weight_data[position, 0] = 2 * (h2 - center) * locality_distance
+            pos_weight_data = pos_weight_data * self.locality_strength
+            self.blocks[i].attn.pos_proj.weight.set_data(pos_weight_data)
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.patch_embed(x)
+        if self.use_pos_embed:
+            x = x + self.pos_embed
+        x = self.pos_drop(x)
+        cls_tokens = ops.tile(self.cls_token, (x.shape[0], 1, 1))
+        for u, blk in enumerate(self.blocks):
+            if u == self.local_up_to_layer:
+                x = ops.Cast()(x, cls_tokens.dtype)
+                x = ops.concat((cls_tokens, x), 1)
+            x = blk(x)
+        x = self.norm(x)
+        return x[:, 0]
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.convit.convit_base(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConViT base model +Refer to the base class "models.ConViT" for more details.

+ +
+ Source code in mindcv/models/convit.py +
398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
@register_model
+def convit_base(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:
+    """Get ConViT base model
+    Refer to the base class "models.ConViT" for more details.
+    """
+    default_cfg = default_cfgs["convit_base"]
+    model = ConViT(in_channels=in_channels, num_classes=num_classes,
+                   num_heads=16, embed_dim=768, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convit.convit_base_plus(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConViT base+ model +Refer to the base class "models.ConViT" for more details.

+ +
+ Source code in mindcv/models/convit.py +
413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
@register_model
+def convit_base_plus(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:
+    """Get ConViT base+ model
+    Refer to the base class "models.ConViT" for more details.
+    """
+    default_cfg = default_cfgs["convit_base_plus"]
+    model = ConViT(in_channels=in_channels, num_classes=num_classes,
+                   num_heads=16, embed_dim=1024, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convit.convit_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConViT small model +Refer to the base class "models.ConViT" for more details.

+ +
+ Source code in mindcv/models/convit.py +
368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
@register_model
+def convit_small(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:
+    """Get ConViT small model
+    Refer to the base class "models.ConViT" for more details.
+    """
+    default_cfg = default_cfgs["convit_small"]
+    model = ConViT(in_channels=in_channels, num_classes=num_classes,
+                   num_heads=9, embed_dim=432, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convit.convit_small_plus(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConViT small+ model +Refer to the base class "models.ConViT" for more details.

+ +
+ Source code in mindcv/models/convit.py +
383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
@register_model
+def convit_small_plus(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:
+    """Get ConViT small+ model
+    Refer to the base class "models.ConViT" for more details.
+    """
+    default_cfg = default_cfgs["convit_small_plus"]
+    model = ConViT(in_channels=in_channels, num_classes=num_classes,
+                   num_heads=9, embed_dim=576, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convit.convit_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConViT tiny model +Refer to the base class "models.ConViT" for more details.

+ +
+ Source code in mindcv/models/convit.py +
338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
@register_model
+def convit_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:
+    """Get ConViT tiny model
+    Refer to the base class "models.ConViT" for more details.
+    """
+    default_cfg = default_cfgs["convit_tiny"]
+    model = ConViT(in_channels=in_channels, num_classes=num_classes,
+                   num_heads=4, embed_dim=192, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convit.convit_tiny_plus(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConViT tiny+ model +Refer to the base class "models.ConViT" for more details.

+ +
+ Source code in mindcv/models/convit.py +
353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
@register_model
+def convit_tiny_plus(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> ConViT:
+    """Get ConViT tiny+ model
+    Refer to the base class "models.ConViT" for more details.
+    """
+    default_cfg = default_cfgs["convit_tiny_plus"]
+    model = ConViT(in_channels=in_channels, num_classes=num_classes,
+                   num_heads=4, embed_dim=256, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

convnext

+ + +
+ + + +

+ mindcv.models.convnext.ConvNeXt + + +

+ + +
+

+ Bases: nn.Cell

+ + +

ConvNeXt and ConvNeXt V2 model class, based on +"A ConvNet for the 2020s" <https://arxiv.org/abs/2201.03545>_ and +"ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders" <https://arxiv.org/abs/2301.00808>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
in_channels +
+

dim of the input channel.

+
+

+ + TYPE: + int + +

+
num_classes +
+

dim of the classes predicted.

+
+

+ + TYPE: + int + +

+
depths +
+

the depths of each layer.

+
+

+ + TYPE: + List[int] + +

+
dims +
+

the middle dim of each layer.

+
+

+ + TYPE: + List[int] + +

+
drop_path_rate +
+

the rate of droppath. Default: 0.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
layer_scale_init_value +
+

the parameter of init for the classifier. Default: 1e-6.

+
+

+ + TYPE: + float + + + DEFAULT: + 1e-06 + +

+
head_init_scale +
+

the parameter of init for the head. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
use_grn +
+

If True, use Global Response Normalization in each block. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/convnext.py +
156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
class ConvNeXt(nn.Cell):
+    r"""ConvNeXt and ConvNeXt V2 model class, based on
+    `"A ConvNet for the 2020s" <https://arxiv.org/abs/2201.03545>`_ and
+    `"ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders" <https://arxiv.org/abs/2301.00808>`_
+
+    Args:
+        in_channels: dim of the input channel.
+        num_classes: dim of the classes predicted.
+        depths: the depths of each layer.
+        dims: the middle dim of each layer.
+        drop_path_rate: the rate of droppath. Default: 0.0.
+        layer_scale_init_value: the parameter of init for the classifier. Default: 1e-6.
+        head_init_scale: the parameter of init for the head. Default: 1.0.
+        use_grn: If True, use Global Response Normalization in each block. Default: False.
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        num_classes: int,
+        depths: List[int],
+        dims: List[int],
+        drop_path_rate: float = 0.0,
+        layer_scale_init_value: float = 1e-6,
+        head_init_scale: float = 1.0,
+        use_grn: bool = False,
+    ):
+        super().__init__()
+
+        downsample_layers = []  # stem and 3 intermediate down_sampling conv layers
+        stem = nn.SequentialCell(
+            nn.Conv2d(in_channels, dims[0], kernel_size=4, stride=4, has_bias=True),
+            ConvNextLayerNorm((dims[0],), epsilon=1e-6, norm_axis=1),
+        )
+        downsample_layers.append(stem)
+        for i in range(3):
+            downsample_layer = nn.SequentialCell(
+                ConvNextLayerNorm((dims[i],), epsilon=1e-6, norm_axis=1),
+                nn.Conv2d(dims[i], dims[i + 1], kernel_size=2, stride=2, has_bias=True),
+            )
+            downsample_layers.append(downsample_layer)
+
+        total_reduction = 4
+        self.feature_info = []
+        self.flatten_sequential = True
+
+        stages = []  # 4 feature resolution stages, each consisting of multiple residual blocks
+        dp_rates = list(np.linspace(0, drop_path_rate, sum(depths)))
+        cur = 0
+        for i in range(4):
+            blocks = []
+            for j in range(depths[i]):
+                blocks.append(Block(dim=dims[i], drop_path=dp_rates[cur + j],
+                                    layer_scale_init_value=layer_scale_init_value, use_grn=use_grn))
+            stage = nn.SequentialCell(blocks)
+            stages.append(stage)
+            cur += depths[i]
+
+            if i > 0:
+                total_reduction *= 2
+            self.feature_info.append(dict(chs=dims[i], reduction=total_reduction, name=f'feature.{i * 2 + 1}'))
+
+        self.feature = nn.SequentialCell([
+            downsample_layers[0],
+            stages[0],
+            downsample_layers[1],
+            stages[1],
+            downsample_layers[2],
+            stages[2],
+            downsample_layers[3],
+            stages[3]
+        ])
+        self.norm = ConvNextLayerNorm((dims[-1],), epsilon=1e-6)  # final norm layer
+        self.classifier = nn.Dense(dims[-1], num_classes)  # classifier
+        self.head_init_scale = head_init_scale
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, (nn.Dense, nn.Conv2d)):
+                cell.weight.set_data(
+                    init.initializer(init.TruncatedNormal(sigma=0.02), cell.weight.shape, cell.weight.dtype)
+                )
+                if isinstance(cell, nn.Dense) and cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Zero(), cell.bias.shape, cell.bias.dtype))
+        self.classifier.weight.set_data(self.classifier.weight * self.head_init_scale)
+        self.classifier.bias.set_data(self.classifier.bias * self.head_init_scale)
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.classifier(x)
+        return x
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.feature(x)
+        return self.norm(x.mean([-2, -1]))  # global average pooling, (N, C, H, W) -> (N, C)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnext_base(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt base model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
287
+288
+289
+290
+291
+292
+293
+294
+295
+296
@register_model
+def convnext_base(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt base model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnext_base"]
+    model_args = dict(
+        in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], **kwargs
+    )
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnext_large(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt large model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
299
+300
+301
+302
+303
+304
+305
+306
+307
+308
@register_model
+def convnext_large(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt large model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnext_large"]
+    model_args = dict(
+        in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], **kwargs
+    )
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnext_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt small model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
275
+276
+277
+278
+279
+280
+281
+282
+283
+284
@register_model
+def convnext_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt small model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnext_small"]
+    model_args = dict(
+        in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3], dims=[96, 192, 384, 768], **kwargs
+    )
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnext_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt tiny model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
263
+264
+265
+266
+267
+268
+269
+270
+271
+272
@register_model
+def convnext_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt tiny model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnext_tiny"]
+    model_args = dict(
+        in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], **kwargs
+    )
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnext_xlarge(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt xlarge model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
311
+312
+313
+314
+315
+316
+317
+318
+319
+320
@register_model
+def convnext_xlarge(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt xlarge model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnext_xlarge"]
+    model_args = dict(
+        in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3], dims=[256, 512, 1024, 2048], **kwargs
+    )
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_atto(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 atto model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
323
+324
+325
+326
+327
+328
+329
+330
+331
@register_model
+def convnextv2_atto(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 atto model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_atto"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[2, 2, 6, 2],
+                      dims=[40, 80, 160, 320], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_base(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 base model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
378
+379
+380
+381
+382
+383
+384
+385
+386
@register_model
+def convnextv2_base(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 base model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_base"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3],
+                      dims=[128, 256, 512, 1024], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_femto(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 femto model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
334
+335
+336
+337
+338
+339
+340
+341
+342
@register_model
+def convnextv2_femto(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 femto model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_femto"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[2, 2, 6, 2],
+                      dims=[48, 96, 192, 384], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_huge(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 huge model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
400
+401
+402
+403
+404
+405
+406
+407
+408
@register_model
+def convnextv2_huge(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 huge model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_huge"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3],
+                      dims=[352, 704, 1408, 2816], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_large(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 large model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
389
+390
+391
+392
+393
+394
+395
+396
+397
@register_model
+def convnextv2_large(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 large model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_large"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 27, 3],
+                      dims=[192, 384, 768, 1536], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_nano(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 nano model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
356
+357
+358
+359
+360
+361
+362
+363
+364
@register_model
+def convnextv2_nano(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 nano model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_nano"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[2, 2, 8, 2],
+                      dims=[80, 160, 320, 640], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_pico(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 pico model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
345
+346
+347
+348
+349
+350
+351
+352
+353
@register_model
+def convnextv2_pico(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 pico model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_pico"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[2, 2, 6, 2],
+                      dims=[64, 128, 256, 512], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.convnext.convnextv2_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ConvNeXt_v2 tiny model. +Refer to the base class 'models.ConvNeXt' for more details.

+ +
+ Source code in mindcv/models/convnext.py +
367
+368
+369
+370
+371
+372
+373
+374
+375
@register_model
+def convnextv2_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ConvNeXt:
+    """Get ConvNeXt_v2 tiny model.
+    Refer to the base class 'models.ConvNeXt' for more details.
+    """
+    default_cfg = default_cfgs["convnextv2_tiny"]
+    model_args = dict(in_channels=in_channels, num_classes=num_classes, depths=[3, 3, 9, 3],
+                      dims=[96, 192, 384, 768], use_grn=True, layer_scale_init_value=0.0, **kwargs)
+    return _create_convnext(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +

crossvit

+ + + +
+ + + +

+mindcv.models.crossvit.crossvit_15(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/crossvit.py +
466
+467
+468
+469
+470
+471
+472
+473
+474
+475
@register_model
+def crossvit_15(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VisionTransformer:
+    model = VisionTransformer(img_size=[240, 224],
+                              patch_size=[12, 16], embed_dim=[192, 384], depth=[[1, 5, 0], [1, 5, 0], [1, 5, 0]],
+                              num_heads=[6, 6], mlp_ratio=[3, 3, 1], qkv_bias=True,
+                              norm_layer=nn.LayerNorm, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    default_cfg = default_cfgs["crossvit_15"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.crossvit.crossvit_18(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/crossvit.py +
478
+479
+480
+481
+482
+483
+484
+485
+486
+487
@register_model
+def crossvit_18(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VisionTransformer:
+    model = VisionTransformer(img_size=[240, 224],
+                              patch_size=[12, 16], embed_dim=[224, 448], depth=[[1, 6, 0], [1, 6, 0], [1, 6, 0]],
+                              num_heads=[7, 7], mlp_ratio=[3, 3, 1], qkv_bias=True,
+                              norm_layer=nn.LayerNorm, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    default_cfg = default_cfgs["crossvit_18"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.crossvit.crossvit_9(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/crossvit.py +
454
+455
+456
+457
+458
+459
+460
+461
+462
+463
@register_model
+def crossvit_9(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    model = VisionTransformer(img_size=[240, 224],
+                              patch_size=[12, 16], embed_dim=[128, 256], depth=[[1, 3, 0], [1, 3, 0], [1, 3, 0]],
+                              num_heads=[4, 4], mlp_ratio=[3, 3, 1], qkv_bias=True,
+                              norm_layer=nn.LayerNorm, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    default_cfg = default_cfgs["crossvit_9"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

densenet

+ + +
+ + + +

+ mindcv.models.densenet.DenseNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Densenet-BC model class, based on +"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
growth_rate +
+

how many filters to add each layer (k in paper). Default: 32.

+
+

+ + TYPE: + int + + + DEFAULT: + 32 + +

+
block_config +
+

how many layers in each pooling block. Default: (6, 12, 24, 16).

+
+

+ + TYPE: + Tuple[int, int, int, int] + + + DEFAULT: + (6, 12, 24, 16) + +

+
num_init_features +
+

number of filters in the first Conv2d. Default: 64.

+
+

+ + TYPE: + int + + + DEFAULT: + 64 + +

+
bn_size +
+

multiplicative factor for number of bottleneck layers +(i.e. bn_size * k features in the bottleneck layer). Default: 4.

+
+

+ + TYPE: + int + + + DEFAULT: + 4 + +

+
drop_rate +
+

dropout rate after each dense layer. Default: 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
in_channels +
+

number of input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
+ +
+ Source code in mindcv/models/densenet.py +
126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
class DenseNet(nn.Cell):
+    r"""Densenet-BC model class, based on
+    `"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
+
+    Args:
+        growth_rate: how many filters to add each layer (`k` in paper). Default: 32.
+        block_config: how many layers in each pooling block. Default: (6, 12, 24, 16).
+        num_init_features: number of filters in the first Conv2d. Default: 64.
+        bn_size (int): multiplicative factor for number of bottleneck layers
+          (i.e. bn_size * k features in the bottleneck layer). Default: 4.
+        drop_rate: dropout rate after each dense layer. Default: 0.
+        in_channels: number of input channels. Default: 3.
+        num_classes: number of classification classes. Default: 1000.
+    """
+
+    def __init__(
+        self,
+        growth_rate: int = 32,
+        block_config: Tuple[int, int, int, int] = (6, 12, 24, 16),
+        num_init_features: int = 64,
+        bn_size: int = 4,
+        drop_rate: float = 0.0,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+    ) -> None:
+        super().__init__()
+        layers = OrderedDict()
+        # first Conv2d
+        num_features = num_init_features
+        layers["conv0"] = nn.Conv2d(in_channels, num_features, kernel_size=7, stride=2, pad_mode="pad", padding=3)
+        layers["norm0"] = nn.BatchNorm2d(num_features)
+        layers["relu0"] = nn.ReLU()
+        layers["pool0"] = nn.SequentialCell([
+            nn.Pad(paddings=((0, 0), (0, 0), (1, 1), (1, 1)), mode="CONSTANT"),
+            nn.MaxPool2d(kernel_size=3, stride=2),
+        ])
+
+        # DenseBlock
+        for i, num_layers in enumerate(block_config):
+            block = _DenseBlock(
+                num_layers=num_layers,
+                num_input_features=num_features,
+                bn_size=bn_size,
+                growth_rate=growth_rate,
+                drop_rate=drop_rate,
+            )
+            layers[f"denseblock{i + 1}"] = block
+            num_features += num_layers * growth_rate
+            if i != len(block_config) - 1:
+                transition = _Transition(num_features, num_features // 2)
+                layers[f"transition{i + 1}"] = transition
+                num_features = num_features // 2
+
+        # final bn+ReLU
+        layers["norm5"] = nn.BatchNorm2d(num_features)
+        layers["relu5"] = nn.ReLU()
+
+        self.num_features = num_features
+        self.features = nn.SequentialCell(layers)
+        self.pool = GlobalAvgPooling()
+        self.classifier = nn.Dense(self.num_features, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(math.sqrt(5), mode="fan_out", nonlinearity="relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="leaky_relu"),
+                                         cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="leaky_relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.densenet.densenet121(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 121 layers DenseNet model. +Refer to the base class models.DenseNet for more details.

+ +
+ Source code in mindcv/models/densenet.py +
225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
@register_model
+def densenet121(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DenseNet:
+    """Get 121 layers DenseNet model.
+     Refer to the base class `models.DenseNet` for more details."""
+    default_cfg = default_cfgs["densenet121"]
+    model = DenseNet(growth_rate=32, block_config=(6, 12, 24, 16), num_init_features=64, in_channels=in_channels,
+                     num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.densenet.densenet161(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 161 layers DenseNet model. +Refer to the base class models.DenseNet for more details.

+ +
+ Source code in mindcv/models/densenet.py +
239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
@register_model
+def densenet161(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DenseNet:
+    """Get 161 layers DenseNet model.
+     Refer to the base class `models.DenseNet` for more details."""
+    default_cfg = default_cfgs["densenet161"]
+    model = DenseNet(growth_rate=48, block_config=(6, 12, 36, 24), num_init_features=96, in_channels=in_channels,
+                     num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.densenet.densenet169(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 169 layers DenseNet model. +Refer to the base class models.DenseNet for more details.

+ +
+ Source code in mindcv/models/densenet.py +
253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
@register_model
+def densenet169(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DenseNet:
+    """Get 169 layers DenseNet model.
+     Refer to the base class `models.DenseNet` for more details."""
+    default_cfg = default_cfgs["densenet169"]
+    model = DenseNet(growth_rate=32, block_config=(6, 12, 32, 32), num_init_features=64, in_channels=in_channels,
+                     num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.densenet.densenet201(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 201 layers DenseNet model. +Refer to the base class models.DenseNet for more details.

+ +
+ Source code in mindcv/models/densenet.py +
267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
@register_model
+def densenet201(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DenseNet:
+    """Get 201 layers DenseNet model.
+     Refer to the base class `models.DenseNet` for more details."""
+    default_cfg = default_cfgs["densenet201"]
+    model = DenseNet(growth_rate=32, block_config=(6, 12, 48, 32), num_init_features=64, in_channels=in_channels,
+                     num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

dpn

+ + +
+ + + +

+ mindcv.models.dpn.DPN + + +

+ + +
+

+ Bases: nn.Cell

+ + +

DPN model class, based on +"Dual Path Networks" <https://arxiv.org/pdf/1707.01629.pdf>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_init_channel +
+

int type, the output channel of first blocks. Default: 64.

+
+

+ + TYPE: + int + + + DEFAULT: + 64 + +

+
k_r +
+

int type, the first channel of each stage. Default: 96.

+
+

+ + TYPE: + int + + + DEFAULT: + 96 + +

+
g +
+

int type,number of group in the conv2d. Default: 32.

+
+

+ + TYPE: + int + + + DEFAULT: + 32 + +

+
k_sec +
+

multiplicative factor for number of bottleneck layers. Default: 4.

+
+

+ + TYPE: + Tuple[int] + + + DEFAULT: + (3, 4, 20, 3) + +

+
inc_sec +
+

the first output channel in each stage. Default: (16, 32, 24, 128).

+
+

+ + TYPE: + Tuple[int] + + + DEFAULT: + (16, 32, 24, 128) + +

+
in_channels +
+

int type, number of input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

int type, number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
+ +
+ Source code in mindcv/models/dpn.py +
140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
class DPN(nn.Cell):
+    r"""DPN model class, based on
+    `"Dual Path Networks" <https://arxiv.org/pdf/1707.01629.pdf>`_
+
+    Args:
+        num_init_channel: int type, the output channel of first blocks. Default: 64.
+        k_r: int type, the first channel of each stage. Default: 96.
+        g: int type,number of group in the conv2d. Default: 32.
+        k_sec Tuple[int]: multiplicative factor for number of bottleneck layers. Default: 4.
+        inc_sec Tuple[int]: the first output channel in each stage. Default: (16, 32, 24, 128).
+        in_channels: int type, number of input channels. Default: 3.
+        num_classes: int type, number of classification classes. Default: 1000.
+    """
+
+    def __init__(
+        self,
+        num_init_channel: int = 64,
+        k_r: int = 96,
+        g: int = 32,
+        k_sec: Tuple[int, int, int, int] = (3, 4, 20, 3),
+        inc_sec: Tuple[int, int, int, int] = (16, 32, 24, 128),
+        in_channels: int = 3,
+        num_classes: int = 1000,
+    ):
+        super().__init__()
+        blocks = OrderedDict()
+
+        # conv1
+        blocks["conv1"] = nn.SequentialCell(OrderedDict([
+            ("conv", nn.Conv2d(in_channels, num_init_channel, kernel_size=7, stride=2, pad_mode="pad", padding=3)),
+            ("norm", nn.BatchNorm2d(num_init_channel, eps=1e-3, momentum=0.9)),
+            ("relu", nn.ReLU()),
+            ("maxpool", nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")),
+        ]))
+
+        # conv2
+        bw = 256
+        inc = inc_sec[0]
+        r = int((k_r * bw) / 256)
+        blocks["conv2_1"] = DualPathBlock(num_init_channel, r, r, bw, inc, g, "proj", False)
+        in_channel = bw + 3 * inc
+        for i in range(2, k_sec[0] + 1):
+            blocks[f"conv2_{i}"] = DualPathBlock(in_channel, r, r, bw, inc, g, "normal")
+            in_channel += inc
+
+        # conv3
+        bw = 512
+        inc = inc_sec[1]
+        r = int((k_r * bw) / 256)
+        blocks["conv3_1"] = DualPathBlock(in_channel, r, r, bw, inc, g, "down")
+        in_channel = bw + 3 * inc
+        for i in range(2, k_sec[1] + 1):
+            blocks[f"conv3_{i}"] = DualPathBlock(in_channel, r, r, bw, inc, g, "normal")
+            in_channel += inc
+
+        # conv4
+        bw = 1024
+        inc = inc_sec[2]
+        r = int((k_r * bw) / 256)
+        blocks["conv4_1"] = DualPathBlock(in_channel, r, r, bw, inc, g, "down")
+        in_channel = bw + 3 * inc
+        for i in range(2, k_sec[2] + 1):
+            blocks[f"conv4_{i}"] = DualPathBlock(in_channel, r, r, bw, inc, g, "normal")
+            in_channel += inc
+
+        # conv5
+        bw = 2048
+        inc = inc_sec[3]
+        r = int((k_r * bw) / 256)
+        blocks["conv5_1"] = DualPathBlock(in_channel, r, r, bw, inc, g, "down")
+        in_channel = bw + 3 * inc
+        for i in range(2, k_sec[3] + 1):
+            blocks[f"conv5_{i}"] = DualPathBlock(in_channel, r, r, bw, inc, g, "normal")
+            in_channel += inc
+
+        self.features = nn.SequentialCell(blocks)
+        self.conv5_x = nn.SequentialCell(OrderedDict([
+            ("norm", nn.BatchNorm2d(in_channel, eps=1e-3, momentum=0.9)),
+            ("relu", nn.ReLU()),
+        ]))
+        self.avgpool = GlobalAvgPooling()
+        self.classifier = nn.Dense(in_channel, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(math.sqrt(5), mode="fan_out", nonlinearity="relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="leaky_relu"),
+                                         cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="leaky_relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_feature(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        x = ops.concat(x, axis=1)
+        x = self.conv5_x(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.avgpool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_feature(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.dpn.dpn107(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 107 layers DPN model. +Refer to the base class models.DPN for more details.

+ +
+ Source code in mindcv/models/dpn.py +
304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
@register_model
+def dpn107(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DPN:
+    """Get 107 layers DPN model.
+     Refer to the base class `models.DPN` for more details."""
+    default_cfg = default_cfgs["dpn107"]
+    model = DPN(num_init_channel=128, k_r=200, g=50, k_sec=(4, 8, 20, 3), inc_sec=(20, 64, 64, 128),
+                num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.dpn.dpn131(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 131 layers DPN model. +Refer to the base class models.DPN for more details.

+ +
+ Source code in mindcv/models/dpn.py +
290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
@register_model
+def dpn131(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DPN:
+    """Get 131 layers DPN model.
+     Refer to the base class `models.DPN` for more details."""
+    default_cfg = default_cfgs["dpn131"]
+    model = DPN(num_init_channel=128, k_r=160, g=40, k_sec=(4, 8, 28, 3), inc_sec=(16, 32, 32, 128),
+                num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.dpn.dpn92(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 92 layers DPN model. +Refer to the base class models.DPN for more details.

+ +
+ Source code in mindcv/models/dpn.py +
262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
@register_model
+def dpn92(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DPN:
+    """Get 92 layers DPN model.
+     Refer to the base class `models.DPN` for more details."""
+    default_cfg = default_cfgs["dpn92"]
+    model = DPN(num_init_channel=64, k_r=96, g=32, k_sec=(3, 4, 20, 3), inc_sec=(16, 32, 24, 128),
+                num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.dpn.dpn98(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 98 layers DPN model. +Refer to the base class models.DPN for more details.

+ +
+ Source code in mindcv/models/dpn.py +
276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
@register_model
+def dpn98(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> DPN:
+    """Get 98 layers DPN model.
+     Refer to the base class `models.DPN` for more details."""
+    default_cfg = default_cfgs["dpn98"]
+    model = DPN(num_init_channel=96, k_r=160, g=40, k_sec=(3, 6, 20, 3), inc_sec=(16, 32, 32, 128),
+                num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

edgenext

+ + +
+ + + +

+ mindcv.models.edgenext.EdgeNeXt + + +

+ + +
+

+ Bases: nn.Cell

+ + +

EdgeNeXt model class, based on +"Efficiently Amalgamated CNN-Transformer Architecture for Mobile Vision" <https://arxiv.org/abs/2206.10589>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
in_channels +
+

number of input channels. Default: 3

+
+

+

+
num_classes +
+

number of classification classes. Default: 1000

+
+

+ + DEFAULT: + 1000 + +

+
depths +
+

the depths of each layer. Default: [0, 0, 0, 3]

+
+

+ + DEFAULT: + [3, 3, 9, 3] + +

+
dims +
+

the middle dim of each layer. Default: [24, 48, 88, 168]

+
+

+ + DEFAULT: + [24, 48, 88, 168] + +

+
global_block +
+

number of global block. Default: [0, 0, 0, 3]

+
+

+ + DEFAULT: + [0, 0, 0, 3] + +

+
global_block_type +
+

type of global block. Default: ['None', 'None', 'None', 'SDTA']

+
+

+ + DEFAULT: + ['None', 'None', 'None', 'SDTA'] + +

+
drop_path_rate +
+

Stochastic Depth. Default: 0.

+
+

+ + DEFAULT: + 0.0 + +

+
layer_scale_init_value +
+

value of layer scale initialization. Default: 1e-6

+
+

+ + DEFAULT: + 1e-06 + +

+
head_init_scale +
+

scale of head initialization. Default: 1.

+
+

+ + DEFAULT: + 1.0 + +

+
expan_ratio +
+

ratio of expansion. Default: 4

+
+

+ + DEFAULT: + 4 + +

+
kernel_sizes +
+

kernel sizes of different stages. Default: [7, 7, 7, 7]

+
+

+ + DEFAULT: + [7, 7, 7, 7] + +

+
heads +
+

number of attention heads. Default: [8, 8, 8, 8]

+
+

+ + DEFAULT: + [8, 8, 8, 8] + +

+
use_pos_embd_xca +
+

use position embedding in xca or not. Default: [False, False, False, False]

+
+

+ + DEFAULT: + [False, False, False, False] + +

+
use_pos_embd_global +
+

use position embedding globally or not. Default: False

+
+

+ + DEFAULT: + False + +

+
d2_scales +
+

scales of splitting channels

+
+

+ + DEFAULT: + [2, 3, 4, 5] + +

+
+ +
+ Source code in mindcv/models/edgenext.py +
296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
class EdgeNeXt(nn.Cell):
+    r"""EdgeNeXt model class, based on
+    `"Efficiently Amalgamated CNN-Transformer Architecture for Mobile Vision" <https://arxiv.org/abs/2206.10589>`_
+
+    Args:
+        in_channels: number of input channels. Default: 3
+        num_classes: number of classification classes. Default: 1000
+        depths: the depths of each layer. Default: [0, 0, 0, 3]
+        dims: the middle dim of each layer. Default: [24, 48, 88, 168]
+        global_block: number of global block. Default: [0, 0, 0, 3]
+        global_block_type: type of global block. Default: ['None', 'None', 'None', 'SDTA']
+        drop_path_rate: Stochastic Depth. Default: 0.
+        layer_scale_init_value: value of layer scale initialization. Default: 1e-6
+        head_init_scale: scale of head initialization. Default: 1.
+        expan_ratio: ratio of expansion. Default: 4
+        kernel_sizes: kernel sizes of different stages. Default: [7, 7, 7, 7]
+        heads: number of attention heads. Default: [8, 8, 8, 8]
+        use_pos_embd_xca: use position embedding in xca or not. Default: [False, False, False, False]
+        use_pos_embd_global: use position embedding globally or not. Default: False
+        d2_scales: scales of splitting channels
+    """
+    def __init__(self, in_chans=3, num_classes=1000,
+                 depths=[3, 3, 9, 3], dims=[24, 48, 88, 168],
+                 global_block=[0, 0, 0, 3], global_block_type=["None", "None", "None", "SDTA"],
+                 drop_path_rate=0., layer_scale_init_value=1e-6, head_init_scale=1., expan_ratio=4,
+                 kernel_sizes=[7, 7, 7, 7], heads=[8, 8, 8, 8], use_pos_embd_xca=[False, False, False, False],
+                 use_pos_embd_global=False, d2_scales=[2, 3, 4, 5], **kwargs):
+        super().__init__()
+        for g in global_block_type:
+            assert g in ["None", "SDTA"]
+        if use_pos_embd_global:
+            self.pos_embd = PositionalEncodingFourier(dim=dims[0])
+        else:
+            self.pos_embd = None
+        self.downsample_layers = nn.CellList()  # stem and 3 intermediate downsampling conv layers
+        stem = nn.SequentialCell(
+            nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4, has_bias=True),
+            LayerNorm((dims[0],), epsilon=1e-6, norm_axis=1),
+        )
+        self.downsample_layers.append(stem)
+        for i in range(3):
+            downsample_layer = nn.SequentialCell(
+                LayerNorm((dims[i],), epsilon=1e-6, norm_axis=1),
+                nn.Conv2d(dims[i], dims[i + 1], kernel_size=2, stride=2, has_bias=True),
+            )
+            self.downsample_layers.append(downsample_layer)
+
+        self.stages = nn.CellList()  # 4 feature resolution stages, each consisting of multiple residual blocks
+        dp_rates = list(np.linspace(0, drop_path_rate, sum(depths)))
+        cur = 0
+        for i in range(4):
+            stage_blocks = []
+            for j in range(depths[i]):
+                if j > depths[i] - global_block[i] - 1:
+                    if global_block_type[i] == "SDTA":
+                        stage_blocks.append(SDTAEncoder(dim=dims[i], drop_path=dp_rates[cur + j],
+                                                        expan_ratio=expan_ratio, scales=d2_scales[i],
+                                                        use_pos_emb=use_pos_embd_xca[i], num_heads=heads[i]))
+                    else:
+                        raise NotImplementedError
+                else:
+                    stage_blocks.append(ConvEncoder(dim=dims[i], drop_path=dp_rates[cur + j],
+                                                    layer_scale_init_value=layer_scale_init_value,
+                                                    expan_ratio=expan_ratio, kernel_size=kernel_sizes[i]))
+
+            self.stages.append(nn.SequentialCell(*stage_blocks))
+            cur += depths[i]
+        self.norm = nn.LayerNorm((dims[-1],), epsilon=1e-6)  # Final norm layer
+        self.head = nn.Dense(dims[-1], num_classes)
+
+        # self.head_dropout = Dropout(kwargs["classifier_dropout"])
+        self.head_dropout = Dropout(p=0.0)
+        self.head_init_scale = head_init_scale
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, (nn.Dense, nn.Conv2d)):
+                cell.weight.set_data(
+                    init.initializer(init.TruncatedNormal(sigma=0.02), cell.weight.shape, cell.weight.dtype)
+                )
+                if isinstance(cell, nn.Dense) and cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Zero(), cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, (nn.LayerNorm)):
+                cell.gamma.set_data(init.initializer(init.One(), cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer(init.Zero(), cell.beta.shape, cell.beta.dtype))
+        self.head.weight.set_data(self.head.weight * self.head_init_scale)
+        self.head.bias.set_data(self.head.bias * self.head_init_scale)
+
+    def forward_features(self, x):
+        x = self.downsample_layers[0](x)
+        x = self.stages[0](x)
+        if self.pos_embd is not None:
+            B, C, H, W = x.shape
+            x = x + self.pos_embd(B, H, W)
+        for i in range(1, 4):
+            x = self.downsample_layers[i](x)
+            x = self.stages[i](x)
+        return self.norm(x.mean([-2, -1]))  # Global average pooling, (N, C, H, W) -> (N, C)
+
+    def construct(self, x):
+        x = self.forward_features(x)
+        x = self.head(self.head_dropout(x))
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.edgenext.edgenext_base(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get edgenext_base model. +Refer to the base class models.EdgeNeXt for more details.

+ +
+ Source code in mindcv/models/edgenext.py +
472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
@register_model
+def edgenext_base(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> EdgeNeXt:
+    """Get edgenext_base model.
+    Refer to the base class `models.EdgeNeXt` for more details."""
+    default_cfg = default_cfgs["edgenext_base"]
+    model = EdgeNeXt(
+        depths=[3, 3, 9, 3],
+        dims=[80, 160, 288, 584],
+        expan_ratio=4,
+        num_classes=num_classes,
+        global_block=[0, 1, 1, 1],
+        global_block_type=["None", "SDTA", "SDTA", "SDTA"],
+        use_pos_embd_xca=[False, True, False, False],
+        kernel_sizes=[3, 5, 7, 9],
+        d2_scales=[2, 2, 3, 4],
+        **kwargs
+    )
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.edgenext.edgenext_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get edgenext_small model. +Refer to the base class models.EdgeNeXt for more details.

+ +
+ Source code in mindcv/models/edgenext.py +
450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
@register_model
+def edgenext_small(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> EdgeNeXt:
+    """Get edgenext_small model.
+    Refer to the base class `models.EdgeNeXt` for more details."""
+    default_cfg = default_cfgs["edgenext_small"]
+    model = EdgeNeXt(
+        depths=[3, 3, 9, 3],
+        dims=[48, 96, 160, 304],
+        expan_ratio=4,
+        num_classes=num_classes,
+        global_block=[0, 1, 1, 1],
+        global_block_type=["None", "SDTA", "SDTA", "SDTA"],
+        use_pos_embd_xca=[False, True, False, False],
+        kernel_sizes=[3, 5, 7, 9],
+        d2_scales=[2, 2, 3, 4],
+        **kwargs
+    )
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.edgenext.edgenext_x_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get edgenext_x_small model. +Refer to the base class models.EdgeNeXt for more details.

+ +
+ Source code in mindcv/models/edgenext.py +
427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
@register_model
+def edgenext_x_small(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> EdgeNeXt:
+    """Get edgenext_x_small model.
+    Refer to the base class `models.EdgeNeXt` for more details."""
+    default_cfg = default_cfgs["edgenext_x_small"]
+    model = EdgeNeXt(
+        depths=[3, 3, 9, 3],
+        dims=[32, 64, 100, 192],
+        expan_ratio=4,
+        num_classes=num_classes,
+        global_block=[0, 1, 1, 1],
+        global_block_type=["None", "SDTA", "SDTA", "SDTA"],
+        use_pos_embd_xca=[False, True, False, False],
+        kernel_sizes=[3, 5, 7, 9],
+        heads=[4, 4, 4, 4],
+        d2_scales=[2, 2, 3, 4],
+        **kwargs
+    )
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.edgenext.edgenext_xx_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get edgenext_xx_small model. +Refer to the base class models.EdgeNeXt for more details.

+ +
+ Source code in mindcv/models/edgenext.py +
403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
@register_model
+def edgenext_xx_small(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> EdgeNeXt:
+    """Get edgenext_xx_small model.
+        Refer to the base class `models.EdgeNeXt` for more details."""
+    default_cfg = default_cfgs["edgenext_xx_small"]
+    model = EdgeNeXt(
+        depths=[2, 2, 6, 2],
+        dims=[24, 48, 88, 168],
+        expan_ratio=4,
+        num_classes=num_classes,
+        global_block=[0, 1, 1, 1],
+        global_block_type=['None', 'SDTA', 'SDTA', 'SDTA'],
+        use_pos_embd_xca=[False, True, False, False],
+        kernel_sizes=[3, 5, 7, 9],
+        heads=[4, 4, 4, 4],
+        d2_scales=[2, 2, 3, 4],
+        **kwargs
+    )
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

efficientnet

+ + +
+ + + +

+ mindcv.models.efficientnet.EfficientNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

EfficientNet architecture. +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
arch +
+

The name of the model.

+
+

+ + TYPE: + str + +

+
dropout_rate +
+

The dropout rate of efficientnet.

+
+

+ + TYPE: + float + +

+
width_mult +
+

The ratio of the channel. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
depth_mult +
+

The ratio of num_layers. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
in_channels +
+

The input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

The number of class. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
inverted_residual_setting +
+

The settings of block. +Default: None.

+
+

+ + TYPE: + Sequence[Union[MBConvConfig, FusedMBConvConfig]] + + + DEFAULT: + None + +

+
drop_path_prob +
+

The drop path rate of MBConv. Default: 0.2.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.2 + +

+
norm_layer +
+

The normalization layer. Default: None.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + None + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, 1000).

+
+
+ Source code in mindcv/models/efficientnet.py +
275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
class EfficientNet(nn.Cell):
+    """
+    EfficientNet architecture.
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        arch (str): The name of the model.
+        dropout_rate (float): The dropout rate of efficientnet.
+        width_mult (float): The ratio of the channel. Default: 1.0.
+        depth_mult (float): The ratio of num_layers. Default: 1.0.
+        in_channels (int): The input channels. Default: 3.
+        num_classes (int): The number of class. Default: 1000.
+        inverted_residual_setting (Sequence[Union[MBConvConfig, FusedMBConvConfig]], optional): The settings of block.
+            Default: None.
+        drop_path_prob (float): The drop path rate of MBConv. Default: 0.2.
+        norm_layer (nn.Cell, optional): The normalization layer. Default: None.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, 1000)`.
+    """
+
+    def __init__(
+        self,
+        arch: str,
+        dropout_rate: float,
+        width_mult: float = 1.0,
+        depth_mult: float = 1.0,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+        inverted_residual_setting: Optional[Sequence[Union[MBConvConfig, FusedMBConvConfig]]] = None,
+        drop_path_prob: float = 0.2,
+        norm_layer: Optional[nn.Cell] = None,
+    ) -> None:
+        super().__init__()
+        self.last_channel = None
+
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+            if width_mult >= 1.6:
+                norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.99)
+
+        layers: List[nn.Cell] = []
+
+        if not inverted_residual_setting:
+            if arch.startswith("efficientnet_b"):
+                bneck_conf = partial(MBConvConfig, width_cnf=width_mult, depth_cnf=depth_mult)
+                inverted_residual_setting = [
+                    bneck_conf(1, 3, 1, 32, 16, 1),
+                    bneck_conf(6, 3, 2, 16, 24, 2),
+                    bneck_conf(6, 5, 2, 24, 40, 2),
+                    bneck_conf(6, 3, 2, 40, 80, 3),
+                    bneck_conf(6, 5, 1, 80, 112, 3),
+                    bneck_conf(6, 5, 2, 112, 192, 4),
+                    bneck_conf(6, 3, 1, 192, 320, 1),
+                ]
+            elif arch.startswith("efficientnet_v2_s"):
+                inverted_residual_setting = [
+                    FusedMBConvConfig(1, 3, 1, 24, 24, 2),
+                    FusedMBConvConfig(4, 3, 2, 24, 48, 4),
+                    FusedMBConvConfig(4, 3, 2, 48, 64, 4),
+                    MBConvConfig(4, 3, 2, 64, 128, 6),
+                    MBConvConfig(6, 3, 1, 128, 160, 9),
+                    MBConvConfig(6, 3, 2, 160, 256, 15),
+                ]
+                self.last_channel = 1280
+            elif arch.startswith("efficientnet_v2_m"):
+                inverted_residual_setting = [
+                    FusedMBConvConfig(1, 3, 1, 24, 24, 3),
+                    FusedMBConvConfig(4, 3, 2, 24, 48, 5),
+                    FusedMBConvConfig(4, 3, 2, 48, 80, 5),
+                    MBConvConfig(4, 3, 2, 80, 160, 7),
+                    MBConvConfig(6, 3, 1, 160, 176, 14),
+                    MBConvConfig(6, 3, 2, 176, 304, 18),
+                    MBConvConfig(6, 3, 1, 304, 512, 5),
+                ]
+                self.last_channel = 1280
+            elif arch.startswith("efficientnet_v2_l"):
+                inverted_residual_setting = [
+                    FusedMBConvConfig(1, 3, 1, 32, 32, 4),
+                    FusedMBConvConfig(4, 3, 2, 32, 64, 7),
+                    FusedMBConvConfig(4, 3, 2, 64, 96, 7),
+                    MBConvConfig(4, 3, 2, 96, 192, 10),
+                    MBConvConfig(6, 3, 1, 192, 224, 19),
+                    MBConvConfig(6, 3, 2, 224, 384, 25),
+                    MBConvConfig(6, 3, 1, 384, 640, 7),
+                ]
+                self.last_channel = 1280
+            elif arch.startswith("efficientnet_v2_xl"):
+                inverted_residual_setting = [
+                    FusedMBConvConfig(1, 3, 1, 32, 32, 4),
+                    FusedMBConvConfig(4, 3, 2, 32, 64, 8),
+                    FusedMBConvConfig(4, 3, 2, 64, 96, 8),
+                    MBConvConfig(4, 3, 2, 96, 192, 16),
+                    MBConvConfig(6, 3, 1, 192, 256, 24),
+                    MBConvConfig(6, 3, 2, 256, 512, 32),
+                    MBConvConfig(6, 3, 1, 512, 640, 8),
+                ]
+                self.last_channel = 1280
+
+        # building first layer
+        firstconv_output_channels = inverted_residual_setting[0].input_channels
+        layers.extend([
+            nn.Conv2d(in_channels, firstconv_output_channels, kernel_size=3, stride=2),
+            norm_layer(firstconv_output_channels),
+            Swish(),
+        ])
+
+        total_reduction = 2
+        self.feature_info = [dict(chs=firstconv_output_channels, reduction=total_reduction,
+                                  name=f'features.{len(layers) - 1}')]
+
+        # building MBConv blocks
+        total_stage_blocks = sum(cnf.num_layers for cnf in inverted_residual_setting)
+        stage_block_id = 0
+
+        # cnf is the settings of block
+        for cnf in inverted_residual_setting:
+            stage: List[nn.Cell] = []
+
+            # cnf.num_layers is the num of the same block
+            for _ in range(cnf.num_layers):
+                # copy to avoid modifications. shallow copy is enough
+                block_cnf = copy.copy(cnf)
+
+                block = MBConv
+
+                if "FusedMBConvConfig" in str(type(block_cnf)):
+                    block = FusedMBConv
+
+                # overwrite info if not the first conv in the stage
+                if stage:
+                    block_cnf.input_channels = block_cnf.out_channels
+                    block_cnf.stride = 1
+
+                # adjust dropout rate of blocks based on the depth of the stage block
+                sd_prob = drop_path_prob * float(stage_block_id) / total_stage_blocks
+
+                total_reduction *= block_cnf.stride
+
+                stage.append(block(block_cnf, sd_prob, norm_layer))
+                stage_block_id += 1
+
+            layers.append(nn.SequentialCell(stage))
+
+            self.feature_info.append(dict(chs=cnf.out_channels, reduction=total_reduction,
+                                          name=f'features.{len(layers) - 1}'))
+
+        # building last several layers
+        lastconv_input_channels = inverted_residual_setting[-1].out_channels
+        lastconv_output_channels = self.last_channel if self.last_channel is not None else 4 * lastconv_input_channels
+        layers.extend([
+            nn.Conv2d(lastconv_input_channels, lastconv_output_channels, kernel_size=1),
+            norm_layer(lastconv_output_channels),
+            Swish(),
+        ])
+
+        self.feature_info.append(dict(chs=lastconv_output_channels, reduction=total_reduction,
+                                      name=f'features.{len(layers) - 1}'))
+        self.flatten_sequential = True
+
+        self.features = nn.SequentialCell(layers)
+        self.avgpool = GlobalAvgPooling()
+        self.dropout = Dropout(p=dropout_rate)
+        self.mlp_head = nn.Dense(lastconv_output_channels, num_classes)
+        self._initialize_weights()
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+
+        x = self.avgpool(x)
+
+        if self.training:
+            x = self.dropout(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        return self.mlp_head(x)
+
+    def construct(self, x: Tensor) -> Tensor:
+        """construct"""
+        x = self.forward_features(x)
+        return self.forward_head(x)
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Dense):
+                init_range = 1.0 / np.sqrt(cell.weight.shape[0])
+                cell.weight.set_data(weight_init.initializer(Uniform(init_range), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))
+            if isinstance(cell, nn.Conv2d):
+                out_channel, _, kernel_size_h, kernel_size_w = cell.weight.shape
+                stddev = np.sqrt(2 / int(out_channel * kernel_size_h * kernel_size_w))
+                cell.weight.set_data(
+                    weight_init.initializer(Normal(sigma=stddev), cell.weight.shape, cell.weight.dtype)
+                )
+                if cell.bias is not None:
+                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.efficientnet.EfficientNet.construct(x) + +

+ + +
+ +

construct

+ +
+ Source code in mindcv/models/efficientnet.py +
456
+457
+458
+459
def construct(self, x: Tensor) -> Tensor:
+    """construct"""
+    x = self.forward_features(x)
+    return self.forward_head(x)
+
+
+
+ +
+ + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b0(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B0 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
+513
+514
@register_model
+def efficientnet_b0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B0 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b0", 1.0, 1.0, 0.2, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b1(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B1 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
517
+518
+519
+520
+521
+522
+523
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
@register_model
+def efficientnet_b1(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B1 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b1", 1.0, 1.1, 0.2, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B2 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
@register_model
+def efficientnet_b2(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B2 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b2", 1.1, 1.2, 0.3, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b3(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B3 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
@register_model
+def efficientnet_b3(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B3 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b3", 1.2, 1.4, 0.3, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b4(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B4 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
@register_model
+def efficientnet_b4(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B4 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b4", 1.4, 1.8, 0.4, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b5(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B5 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
+613
+614
@register_model
+def efficientnet_b5(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B5 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b5", 1.6, 2.2, 0.4, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b6(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B6 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
617
+618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
@register_model
+def efficientnet_b6(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B6 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b6", 1.8, 2.6, 0.5, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_b7(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B7 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
@register_model
+def efficientnet_b7(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B7 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_b7", 2.0, 3.1, 0.5, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_v2_l(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B4 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
697
+698
+699
+700
+701
+702
+703
+704
+705
+706
+707
+708
+709
+710
+711
+712
+713
+714
@register_model
+def efficientnet_v2_l(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B4 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_v2_l", 1.0, 1.0, 0.2, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_v2_m(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B4 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
677
+678
+679
+680
+681
+682
+683
+684
+685
+686
+687
+688
+689
+690
+691
+692
+693
+694
@register_model
+def efficientnet_v2_m(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B4 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_v2_m", 1.0, 1.0, 0.2, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_v2_s(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B4 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
657
+658
+659
+660
+661
+662
+663
+664
+665
+666
+667
+668
+669
+670
+671
+672
+673
+674
@register_model
+def efficientnet_v2_s(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B4 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_v2_s", 1.0, 1.0, 0.2, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.efficientnet.efficientnet_v2_xl(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Constructs a EfficientNet B4 architecture from +EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

If True, returns a model pretrained on IMAGENET. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The numbers of classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The input channels. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out}).

+
+
+ Source code in mindcv/models/efficientnet.py +
717
+718
+719
+720
+721
+722
+723
+724
+725
+726
+727
+728
+729
+730
+731
+732
+733
+734
@register_model
+def efficientnet_v2_xl(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> EfficientNet:
+    """
+    Constructs a EfficientNet B4 architecture from
+    `EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pretrained on IMAGENET. Default: False.
+        num_classes (int): The numbers of classes. Default: 1000.
+        in_channels (int): The input channels. Default: 1000.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`.
+    """
+    return _efficientnet("efficientnet_v2_xl", 1.0, 1.0, 0.2, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +

features

+

ghostnet

+ + +
+ + + +

+ mindcv.models.ghostnet.GhostNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

GhostNet model class, based on +"GhostNet: More Features from Cheap Operations " <https://arxiv.org/abs/1911.11907>_.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
width +
+

base width of hidden channel in blocks. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
in_channels +
+

number of input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
drop_rate +
+

the probability of the features before classification. Default: 0.2.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.2 + +

+
+ +
+ Source code in mindcv/models/ghostnet.py +
177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
class GhostNet(nn.Cell):
+    r"""GhostNet model class, based on
+    `"GhostNet: More Features from Cheap Operations " <https://arxiv.org/abs/1911.11907>`_.
+    Args:
+        num_classes: number of classification classes. Default: 1000.
+        width: base width of hidden channel in blocks. Default: 1.0.
+        in_channels: number of input channels. Default: 3.
+        drop_rate: the probability of the features before classification. Default: 0.2.
+    """
+
+    def __init__(
+        self,
+        num_classes: int = 1000,
+        width: float = 1.0,
+        in_channels: int = 3,
+        drop_rate: float = 0.2,
+    ) -> None:
+        super().__init__()
+        # setting of inverted residual blocks
+        self.num_classes = num_classes
+        self.drop_rate = drop_rate
+        self.cfgs = [
+            # k, t, c, SE, s
+            # stage1
+            [[3, 16, 16, 0, 1]],
+            # stage2
+            [[3, 48, 24, 0, 2]],
+            [[3, 72, 24, 0, 1]],
+            # stage3
+            [[5, 72, 40, 0.25, 2]],
+            [[5, 120, 40, 0.25, 1]],
+            # stage4
+            [[3, 240, 80, 0, 2]],
+            [[3, 200, 80, 0, 1],
+             [3, 184, 80, 0, 1],
+             [3, 184, 80, 0, 1],
+             [3, 480, 112, 0.25, 1],
+             [3, 672, 112, 0.25, 1]
+             ],
+            # stage5
+            [[5, 672, 160, 0.25, 2]],
+            [[5, 960, 160, 0, 1],
+             [5, 960, 160, 0.25, 1],
+             [5, 960, 160, 0, 1],
+             [5, 960, 160, 0.25, 1]
+             ]
+        ]
+
+        # building first layer
+        stem_chs = make_divisible(16 * width, 4)
+        self.conv_stem = nn.Conv2d(in_channels, stem_chs, 3, 2, pad_mode="pad", padding=1, has_bias=False)
+        self.bn1 = nn.BatchNorm2d(stem_chs)
+        self.act1 = nn.ReLU()
+        prev_chs = stem_chs
+
+        # building inverted residual blocks
+        stages = []
+        for cfg in self.cfgs:
+            layers = []
+            for k, exp_size, c, se_ratio, s in cfg:
+                out_chs = make_divisible(c * width, 4)
+                mid_chs = make_divisible(exp_size * width, 4)
+                layers.append(GhostBottleneck(prev_chs, mid_chs, out_chs, k, s, se_ratio=se_ratio))
+                prev_chs = out_chs
+            stages.append(nn.SequentialCell(layers))
+
+        out_chs = make_divisible(exp_size * width, 4)
+        stages.append(ConvBnAct(prev_chs, out_chs, 1))
+        prev_chs = out_chs
+
+        self.blocks = nn.SequentialCell(stages)
+
+        # building last several layers
+        self.num_features = out_chs = 1280
+        self.global_pool = GlobalAvgPooling(keep_dims=True)
+        self.conv_head = nn.Conv2d(prev_chs, out_chs, 1, 1, pad_mode="pad", padding=0, has_bias=True)
+        self.act2 = nn.ReLU()
+        self.flatten = nn.Flatten()
+        if self.drop_rate > 0.0:
+            self.dropout = Dropout(p=drop_rate)
+        self.classifier = nn.Dense(out_chs, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(init.initializer(init.HeUniform(), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(init.initializer(init.HeUniform(), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.conv_stem(x)
+        x = self.bn1(x)
+        x = self.act1(x)
+        x = self.blocks(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.global_pool(x)
+        x = self.conv_head(x)
+        x = self.act2(x)
+        x = self.flatten(x)
+        if self.drop_rate > 0.0:
+            x = self.dropout(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.ghostnet.ghostnet_050(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

GhostNet-0.5x

+ +
+ Source code in mindcv/models/ghostnet.py +
298
+299
+300
+301
+302
+303
+304
+305
+306
+307
@register_model
+def ghostnet_050(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """ GhostNet-0.5x """
+    default_cfg = default_cfgs["ghostnet_050"]
+    model = GhostNet(width=0.5, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.ghostnet.ghostnet_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

GhostNet-1.0x

+ +
+ Source code in mindcv/models/ghostnet.py +
310
+311
+312
+313
+314
+315
+316
+317
+318
+319
@register_model
+def ghostnet_100(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """ GhostNet-1.0x """
+    default_cfg = default_cfgs["ghostnet_100"]
+    model = GhostNet(width=1.0, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.ghostnet.ghostnet_130(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

GhostNet-1.3x

+ +
+ Source code in mindcv/models/ghostnet.py +
322
+323
+324
+325
+326
+327
+328
+329
+330
+331
@register_model
+def ghostnet_130(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """ GhostNet-1.3x """
+    default_cfg = default_cfgs["ghostnet_130"]
+    model = GhostNet(width=1.3, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

hrnet

+ + +
+ + + +

+ mindcv.models.hrnet.HRNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

HRNet Backbone, based on +"Deep High-Resolution Representation Learning for Visual Recognition" +<https://arxiv.org/abs/1908.07919>_.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
stage_cfg +
+

Configuration of the extra blocks. It accepts a dictionay +storing the detail config of each block. which include num_modules, +num_branches, block, num_blocks, num_channels. For detail example, +please check the implementation of hrnet_w32 and hrnet_w48.

+
+

+ + TYPE: + Dict[str, Dict[str, int]] + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

Number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Source code in mindcv/models/hrnet.py +
357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
+513
+514
+515
+516
+517
+518
+519
+520
+521
+522
+523
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
+613
+614
+615
+616
+617
+618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
+663
+664
+665
+666
+667
+668
+669
+670
+671
+672
+673
+674
+675
+676
+677
+678
+679
+680
+681
class HRNet(nn.Cell):
+    r"""HRNet Backbone, based on
+    `"Deep High-Resolution Representation Learning for Visual Recognition"
+    <https://arxiv.org/abs/1908.07919>`_.
+
+    Args:
+        stage_cfg: Configuration of the extra blocks. It accepts a dictionay
+            storing the detail config of each block. which include `num_modules`,
+            `num_branches`, `block`, `num_blocks`, `num_channels`. For detail example,
+            please check the implementation of `hrnet_w32` and `hrnet_w48`.
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: Number the channels of the input. Default: 3.
+    """
+
+    blocks_dict = {"BASIC": BasicBlock, "BOTTLENECK": Bottleneck}
+
+    def __init__(
+        self,
+        stage_cfg: Dict[str, Dict[str, int]],
+        num_classes: int = 1000,
+        in_channels: int = 3,
+    ) -> None:
+        super().__init__()
+
+        self.stage_cfg = stage_cfg
+        # stem net
+        self.conv1 = nn.Conv2d(
+            in_channels, 64, kernel_size=3, stride=2, padding=1, pad_mode="pad"
+        )
+        self.bn1 = nn.BatchNorm2d(64)
+        self.conv2 = nn.Conv2d(
+            64, 64, kernel_size=3, stride=2, padding=1, pad_mode="pad"
+        )
+        self.bn2 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU()
+
+        # stage 1
+        self.stage1_cfg = self.stage_cfg["stage1"]
+        num_channels = self.stage1_cfg["num_channels"][0]
+        num_blocks = self.stage1_cfg["num_blocks"][0]
+        block = self.blocks_dict[self.stage1_cfg["block"]]
+        self.layer1 = self._make_layer(block, 64, num_channels, num_blocks)
+
+        # stage 2
+        self.stage2_cfg = self.stage_cfg["stage2"]
+        num_channels = self.stage2_cfg["num_channels"]
+        block = self.blocks_dict[self.stage2_cfg["block"]]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))
+        ]
+
+        self.transition1, self.transition1_flags = self._make_transition_layer(
+            [256], num_channels
+        )
+        self.stage2, pre_stage_channels = self._make_stage(
+            self.stage2_cfg, num_channels
+        )
+
+        # stage 3
+        self.stage3_cfg = self.stage_cfg["stage3"]
+        num_channels = self.stage3_cfg["num_channels"]
+        block = self.blocks_dict[self.stage3_cfg["block"]]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))
+        ]
+
+        self.transition2, self.transition2_flags = self._make_transition_layer(
+            pre_stage_channels, num_channels
+        )
+        self.stage3, pre_stage_channels = self._make_stage(
+            self.stage3_cfg, num_channels
+        )
+
+        # stage 4
+        self.stage4_cfg = self.stage_cfg["stage4"]
+        num_channels = self.stage4_cfg["num_channels"]
+        block = self.blocks_dict[self.stage4_cfg["block"]]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))
+        ]
+        self.transition3, self.transition3_flags = self._make_transition_layer(
+            pre_stage_channels, num_channels
+        )
+        self.stage4, pre_stage_channels = self._make_stage(
+            self.stage4_cfg, num_channels
+        )
+
+        # head
+        self.pool = GlobalAvgPooling()
+        self.incre_modules, self.downsample_modules, self.final_layer = self._make_head(
+            pre_stage_channels
+        )
+        self.classifier = nn.Dense(2048, num_classes)
+
+    def _make_head(self, pre_stage_channels: List[int]):
+        head_block = Bottleneck
+        head_channels = [32, 64, 128, 256]
+
+        # increase the #channesl on each resolution
+        # from C, 2C, 4C, 8C to 128, 256, 512, 1024
+        incre_modules = list()
+        for i, channels in enumerate(pre_stage_channels):
+            incre_module = self._make_layer(
+                head_block, channels, head_channels[i], 1, stride=1
+            )
+            incre_modules.append(incre_module)
+        incre_modules = nn.CellList(incre_modules)
+
+        # downsample modules
+        downsamp_modules = []
+        for i in range(len(pre_stage_channels) - 1):
+            in_channels = head_channels[i] * head_block.expansion
+            out_channels = head_channels[i + 1] * head_block.expansion
+
+            downsamp_module = nn.SequentialCell(
+                nn.Conv2d(
+                    in_channels=in_channels,
+                    out_channels=out_channels,
+                    kernel_size=3,
+                    stride=2,
+                    pad_mode="pad",
+                    padding=1,
+                ),
+                nn.BatchNorm2d(out_channels),
+                nn.ReLU(),
+            )
+
+            downsamp_modules.append(downsamp_module)
+        downsamp_modules = nn.CellList(downsamp_modules)
+
+        final_layer = nn.SequentialCell(
+            nn.Conv2d(
+                in_channels=head_channels[3] * head_block.expansion,
+                out_channels=2048,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+            ),
+            nn.BatchNorm2d(2048),
+            nn.ReLU(),
+        )
+
+        return incre_modules, downsamp_modules, final_layer
+
+    def _make_transition_layer(
+        self, num_channels_pre_layer: List[int], num_channels_cur_layer: List[int]
+    ) -> Tuple[nn.CellList, List[bool]]:
+        num_branches_cur = len(num_channels_cur_layer)
+        num_branches_pre = len(num_channels_pre_layer)
+
+        transition_layers = []
+        transition_layers_flags = []
+        for i in range(num_branches_cur):
+            if i < num_branches_pre:
+                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+                    transition_layers.append(
+                        nn.SequentialCell(
+                            nn.Conv2d(
+                                num_channels_pre_layer[i],
+                                num_channels_cur_layer[i],
+                                kernel_size=3,
+                                padding=1,
+                                pad_mode="pad",
+                            ),
+                            nn.BatchNorm2d(num_channels_cur_layer[i]),
+                            nn.ReLU(),
+                        )
+                    )
+                    transition_layers_flags.append(True)
+                else:
+                    transition_layers.append(IdentityCell())
+                    transition_layers_flags.append(False)
+            else:
+                conv3x3s = []
+                for j in range(i + 1 - num_branches_pre):
+                    inchannels = num_channels_pre_layer[-1]
+                    outchannels = (
+                        num_channels_cur_layer[i]
+                        if j == i - num_branches_pre
+                        else inchannels
+                    )
+                    conv3x3s.append(
+                        nn.SequentialCell(
+                            [
+                                nn.Conv2d(
+                                    inchannels,
+                                    outchannels,
+                                    kernel_size=3,
+                                    stride=2,
+                                    padding=1,
+                                    pad_mode="pad",
+                                ),
+                                nn.BatchNorm2d(outchannels),
+                                nn.ReLU(),
+                            ]
+                        )
+                    )
+                transition_layers.append(nn.SequentialCell(conv3x3s))
+                transition_layers_flags.append(True)
+
+        return nn.CellList(transition_layers), transition_layers_flags
+
+    def _make_layer(
+        self,
+        block: Type[Union[BasicBlock, Bottleneck]],
+        in_channels: int,
+        out_channels: int,
+        blocks: int,
+        stride: int = 1,
+    ) -> nn.SequentialCell:
+        downsample = None
+        if stride != 1 or in_channels != out_channels * block.expansion:
+            downsample = nn.SequentialCell(
+                nn.Conv2d(
+                    in_channels,
+                    out_channels * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                ),
+                nn.BatchNorm2d(out_channels * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(in_channels, out_channels, stride, down_sample=downsample))
+        for _ in range(1, blocks):
+            layers.append(block(out_channels * block.expansion, out_channels))
+
+        return nn.SequentialCell(layers)
+
+    def _make_stage(
+        self,
+        layer_config: Dict[str, int],
+        num_inchannels: int,
+        multi_scale_output: bool = True,
+    ) -> Tuple[nn.SequentialCell, List[int]]:
+        num_modules = layer_config["num_modules"]
+        num_branches = layer_config["num_branches"]
+        num_blocks = layer_config["num_blocks"]
+        num_channels = layer_config["num_channels"]
+        block = self.blocks_dict[layer_config["block"]]
+
+        modules = []
+        for i in range(num_modules):
+            # multi_scale_output is only used last module
+            if not multi_scale_output and i == num_modules - 1:
+                reset_multi_scale_output = False
+            else:
+                reset_multi_scale_output = True
+
+            modules.append(
+                HRModule(
+                    num_branches,
+                    block,
+                    num_blocks,
+                    num_inchannels,
+                    num_channels,
+                    reset_multi_scale_output,
+                )
+            )
+            num_inchannels = modules[-1].num_inchannels
+
+        return nn.SequentialCell(modules), num_inchannels
+
+    def forward_features(self, x: Tensor) -> List[Tensor]:
+        """Perform the feature extraction.
+
+        Args:
+            x: Tensor
+
+        Returns:
+            Extracted feature
+        """
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = self.relu(x)
+
+        # stage 1
+        x = self.layer1(x)
+
+        # stage 2
+        x_list = []
+        for i in range(self.stage2_cfg["num_branches"]):
+            if self.transition1_flags[i]:
+                x_list.append(self.transition1[i](x))
+            else:
+                x_list.append(x)
+        y_list = self.stage2(x_list)
+
+        # stage 3
+        x_list = []
+        for i in range(self.stage3_cfg["num_branches"]):
+            if self.transition2_flags[i]:
+                x_list.append(self.transition2[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y_list = self.stage3(x_list)
+
+        # stage 4
+        x_list = []
+        for i in range(self.stage4_cfg["num_branches"]):
+            if self.transition3_flags[i]:
+                x_list.append(self.transition3[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y = self.stage4(x_list)
+
+        return y
+
+    def forward_head(self, x: List[Tensor]) -> Tensor:
+        y = self.incre_modules[0](x[0])
+        for i in range(len(self.downsample_modules)):
+            y = self.incre_modules[i + 1](x[i + 1]) + self.downsample_modules[i](y)
+
+        y = self.final_layer(y)
+        y = self.pool(y)
+        y = self.classifier(y)
+        return y
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.hrnet.HRNet.forward_features(x) + +

+ + +
+ +

Perform the feature extraction.

+ + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
x +
+

Tensor

+
+

+ + TYPE: + Tensor + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + List[Tensor] + + +
+

Extracted feature

+
+
+ +
+ Source code in mindcv/models/hrnet.py +
620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
+663
+664
+665
+666
def forward_features(self, x: Tensor) -> List[Tensor]:
+    """Perform the feature extraction.
+
+    Args:
+        x: Tensor
+
+    Returns:
+        Extracted feature
+    """
+    x = self.conv1(x)
+    x = self.bn1(x)
+    x = self.relu(x)
+    x = self.conv2(x)
+    x = self.bn2(x)
+    x = self.relu(x)
+
+    # stage 1
+    x = self.layer1(x)
+
+    # stage 2
+    x_list = []
+    for i in range(self.stage2_cfg["num_branches"]):
+        if self.transition1_flags[i]:
+            x_list.append(self.transition1[i](x))
+        else:
+            x_list.append(x)
+    y_list = self.stage2(x_list)
+
+    # stage 3
+    x_list = []
+    for i in range(self.stage3_cfg["num_branches"]):
+        if self.transition2_flags[i]:
+            x_list.append(self.transition2[i](y_list[-1]))
+        else:
+            x_list.append(y_list[i])
+    y_list = self.stage3(x_list)
+
+    # stage 4
+    x_list = []
+    for i in range(self.stage4_cfg["num_branches"]):
+        if self.transition3_flags[i]:
+            x_list.append(self.transition3[i](y_list[-1]))
+        else:
+            x_list.append(y_list[i])
+    y = self.stage4(x_list)
+
+    return y
+
+
+
+ +
+ + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.hrnet.hrnet_w32(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get HRNet with width=32 model. +Refer to the base class models.HRNet for more details.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

Whether the model is pretrained. Default: False

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

number of classification classes. Default: 1000

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

Number of input channels. Default: 3

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + Union[HRNet, HRNetFeatures] + + +
+

HRNet model

+
+
+ +
+ Source code in mindcv/models/hrnet.py +
756
+757
+758
+759
+760
+761
+762
+763
+764
+765
+766
+767
+768
+769
+770
+771
+772
+773
+774
+775
+776
+777
+778
+779
+780
+781
+782
+783
+784
+785
+786
+787
+788
+789
+790
+791
+792
+793
+794
+795
+796
+797
+798
+799
+800
+801
+802
+803
@register_model
+def hrnet_w32(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> Union[HRNet, HRNetFeatures]:
+    """Get HRNet with width=32 model.
+    Refer to the base class `models.HRNet` for more details.
+
+    Args:
+        pretrained: Whether the model is pretrained. Default: False
+        num_classes: number of classification classes. Default: 1000
+        in_channels: Number of input channels. Default: 3
+
+    Returns:
+        HRNet model
+    """
+    default_cfg = default_cfgs["hrnet_w32"]
+    stage_cfg = dict(
+        stage1=dict(
+            num_modules=1,
+            num_branches=1,
+            block="BOTTLENECK",
+            num_blocks=[4],
+            num_channels=[64],
+        ),
+        stage2=dict(
+            num_modules=1,
+            num_branches=2,
+            block="BASIC",
+            num_blocks=[4, 4],
+            num_channels=[32, 64],
+        ),
+        stage3=dict(
+            num_modules=4,
+            num_branches=3,
+            block="BASIC",
+            num_blocks=[4, 4, 4],
+            num_channels=[32, 64, 128],
+        ),
+        stage4=dict(
+            num_modules=3,
+            num_branches=4,
+            block="BASIC",
+            num_blocks=[4, 4, 4, 4],
+            num_channels=[32, 64, 128, 256],
+        ),
+    )
+    model_args = dict(stage_cfg=stage_cfg, num_classes=num_classes, in_channels=in_channels, **kwargs)
+    return _create_hrnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.hrnet.hrnet_w48(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get HRNet with width=48 model. +Refer to the base class models.HRNet for more details.

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

Whether the model is pretrained. Default: False

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

number of classification classes. Default: 1000

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

Number of input channels. Default: 3

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + Union[HRNet, HRNetFeatures] + + +
+

HRNet model

+
+
+ +
+ Source code in mindcv/models/hrnet.py +
806
+807
+808
+809
+810
+811
+812
+813
+814
+815
+816
+817
+818
+819
+820
+821
+822
+823
+824
+825
+826
+827
+828
+829
+830
+831
+832
+833
+834
+835
+836
+837
+838
+839
+840
+841
+842
+843
+844
+845
+846
+847
+848
+849
+850
+851
+852
+853
@register_model
+def hrnet_w48(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> Union[HRNet, HRNetFeatures]:
+    """Get HRNet with width=48 model.
+    Refer to the base class `models.HRNet` for more details.
+
+    Args:
+        pretrained: Whether the model is pretrained. Default: False
+        num_classes: number of classification classes. Default: 1000
+        in_channels: Number of input channels. Default: 3
+
+    Returns:
+        HRNet model
+    """
+    default_cfg = default_cfgs["hrnet_w48"]
+    stage_cfg = dict(
+        stage1=dict(
+            num_modules=1,
+            num_branches=1,
+            block="BOTTLENECK",
+            num_blocks=[4],
+            num_channels=[64],
+        ),
+        stage2=dict(
+            num_modules=1,
+            num_branches=2,
+            block="BASIC",
+            num_blocks=[4, 4],
+            num_channels=[48, 96],
+        ),
+        stage3=dict(
+            num_modules=4,
+            num_branches=3,
+            block="BASIC",
+            num_blocks=[4, 4, 4],
+            num_channels=[48, 96, 192],
+        ),
+        stage4=dict(
+            num_modules=3,
+            num_branches=4,
+            block="BASIC",
+            num_blocks=[4, 4, 4, 4],
+            num_channels=[48, 96, 192, 384],
+        ),
+    )
+    model_args = dict(stage_cfg=stage_cfg, num_classes=num_classes, in_channels=in_channels, **kwargs)
+    return _create_hrnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +

inceptionv3

+ + +
+ + + +

+ mindcv.models.inceptionv3.InceptionV3 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Inception v3 model architecture from +"Rethinking the Inception Architecture for Computer Vision" <https://arxiv.org/abs/1512.00567>_.

+

.. note:: + Important: In contrast to the other models the inception_v3 expects tensors with a size of + N x 3 x 299 x 299, so ensure your images are sized accordingly.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
aux_logits +
+

use auxiliary classifier or not. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
drop_rate +
+

dropout rate of the layer before main classifier. Default: 0.2.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.2 + +

+
+ +
+ Source code in mindcv/models/inceptionv3.py +
224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
class InceptionV3(nn.Cell):
+    r"""Inception v3 model architecture from
+    `"Rethinking the Inception Architecture for Computer Vision" <https://arxiv.org/abs/1512.00567>`_.
+
+    .. note::
+        **Important**: In contrast to the other models the inception_v3 expects tensors with a size of
+        N x 3 x 299 x 299, so ensure your images are sized accordingly.
+
+    Args:
+        num_classes: number of classification classes. Default: 1000.
+        aux_logits: use auxiliary classifier or not. Default: False.
+        in_channels: number the channels of the input. Default: 3.
+        drop_rate: dropout rate of the layer before main classifier. Default: 0.2.
+    """
+
+    def __init__(
+        self,
+        num_classes: int = 1000,
+        aux_logits: bool = True,
+        in_channels: int = 3,
+        drop_rate: float = 0.2,
+    ) -> None:
+        super().__init__()
+        self.aux_logits = aux_logits
+        self.conv1a = BasicConv2d(in_channels, 32, kernel_size=3, stride=2, pad_mode="valid")
+        self.conv2a = BasicConv2d(32, 32, kernel_size=3, stride=1, pad_mode="valid")
+        self.conv2b = BasicConv2d(32, 64, kernel_size=3, stride=1)
+        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2)
+        self.conv3b = BasicConv2d(64, 80, kernel_size=1)
+        self.conv4a = BasicConv2d(80, 192, kernel_size=3, pad_mode="valid")
+        self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2)
+        self.inception5b = InceptionA(192, pool_features=32)
+        self.inception5c = InceptionA(256, pool_features=64)
+        self.inception5d = InceptionA(288, pool_features=64)
+        self.inception6a = InceptionB(288)
+        self.inception6b = InceptionC(768, channels_7x7=128)
+        self.inception6c = InceptionC(768, channels_7x7=160)
+        self.inception6d = InceptionC(768, channels_7x7=160)
+        self.inception6e = InceptionC(768, channels_7x7=192)
+        if self.aux_logits:
+            self.aux = InceptionAux(768, num_classes)
+        self.inception7a = InceptionD(768)
+        self.inception7b = InceptionE(1280)
+        self.inception7c = InceptionE(2048)
+
+        self.pool = GlobalAvgPooling()
+        self.dropout = Dropout(p=drop_rate)
+        self.num_features = 2048
+        self.classifier = nn.Dense(self.num_features, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.XavierUniform(), cell.weight.shape, cell.weight.dtype))
+
+    def forward_preaux(self, x: Tensor) -> Tensor:
+        x = self.conv1a(x)
+        x = self.conv2a(x)
+        x = self.conv2b(x)
+        x = self.maxpool1(x)
+        x = self.conv3b(x)
+        x = self.conv4a(x)
+        x = self.maxpool2(x)
+        x = self.inception5b(x)
+        x = self.inception5c(x)
+        x = self.inception5d(x)
+        x = self.inception6a(x)
+        x = self.inception6b(x)
+        x = self.inception6c(x)
+        x = self.inception6d(x)
+        x = self.inception6e(x)
+        return x
+
+    def forward_postaux(self, x: Tensor) -> Tensor:
+        x = self.inception7a(x)
+        x = self.inception7b(x)
+        x = self.inception7c(x)
+        return x
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.forward_preaux(x)
+        x = self.forward_postaux(x)
+        return x
+
+    def construct(self, x: Tensor) -> Union[Tensor, Tuple[Tensor, Tensor]]:
+        x = self.forward_preaux(x)
+        if self.training and self.aux_logits:
+            aux = self.aux(x)
+        else:
+            aux = None
+        x = self.forward_postaux(x)
+
+        x = self.pool(x)
+        x = self.dropout(x)
+        x = self.classifier(x)
+
+        if self.training and self.aux_logits:
+            return x, aux
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.inceptionv3.inception_v3(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get InceptionV3 model. +Refer to the base class models.InceptionV3 for more details.

+ +
+ Source code in mindcv/models/inceptionv3.py +
328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
@register_model
+def inception_v3(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> InceptionV3:
+    """Get InceptionV3 model.
+    Refer to the base class `models.InceptionV3` for more details."""
+    default_cfg = default_cfgs["inception_v3"]
+    model = InceptionV3(num_classes=num_classes, aux_logits=True, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

inceptionv4

+ + +
+ + + +

+ mindcv.models.inceptionv4.InceptionV4 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Inception v4 model architecture from +"Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning" <https://arxiv.org/abs/1602.07261>_. # noqa: E501

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
drop_rate +
+

dropout rate of the layer before main classifier. Default: 0.2.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.2 + +

+
+ +
+ Source code in mindcv/models/inceptionv4.py +
253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
class InceptionV4(nn.Cell):
+    r"""Inception v4 model architecture from
+    `"Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning" <https://arxiv.org/abs/1602.07261>`_.  # noqa: E501
+
+    Args:
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number the channels of the input. Default: 3.
+        drop_rate: dropout rate of the layer before main classifier. Default: 0.2.
+    """
+
+    def __init__(
+        self,
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        drop_rate: float = 0.2,
+    ) -> None:
+        super().__init__()
+        blocks = [Stem(in_channels)]
+        for _ in range(4):
+            blocks.append(InceptionA())
+        blocks.append(ReductionA())
+        for _ in range(7):
+            blocks.append(InceptionB())
+        blocks.append(ReductionB())
+        for _ in range(3):
+            blocks.append(InceptionC())
+        self.features = nn.SequentialCell(blocks)
+
+        self.pool = GlobalAvgPooling()
+        self.dropout = Dropout(p=drop_rate)
+        self.num_features = 1536
+        self.classifier = nn.Dense(self.num_features, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.XavierUniform(), cell.weight.shape, cell.weight.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.dropout(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.inceptionv4.inception_v4(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get InceptionV4 model. +Refer to the base class models.InceptionV4 for more details.

+ +
+ Source code in mindcv/models/inceptionv4.py +
310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
@register_model
+def inception_v4(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> InceptionV4:
+    """Get InceptionV4 model.
+    Refer to the base class `models.InceptionV4` for more details."""
+    default_cfg = default_cfgs["inception_v4"]
+    model = InceptionV4(num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

mixnet

+ + +
+ + + +

+ mindcv.models.mixnet.MixNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

MixNet model class, based on +"MixConv: Mixed Depthwise Convolutional Kernels" <https://arxiv.org/abs/1907.09595>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
arch +
+

size of the architecture. "small", "medium" or "large". Default: "small".

+
+

+ + TYPE: + str + + + DEFAULT: + 'small' + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number of the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
feature_size +
+

numbet of the channels of the output features. Default: 1536.

+
+

+ + TYPE: + int + + + DEFAULT: + 1536 + +

+
drop_rate +
+

rate of dropout for classifier. Default: 0.2.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.2 + +

+
depth_multiplier +
+

expansion coefficient of channels. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
+ +
+ Source code in mindcv/models/mixnet.py +
227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
class MixNet(nn.Cell):
+    r"""MixNet model class, based on
+    `"MixConv: Mixed Depthwise Convolutional Kernels" <https://arxiv.org/abs/1907.09595>`_
+
+    Args:
+        arch: size of the architecture. "small", "medium" or "large". Default: "small".
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number of the channels of the input. Default: 3.
+        feature_size: numbet of the channels of the output features. Default: 1536.
+        drop_rate: rate of dropout for classifier. Default: 0.2.
+        depth_multiplier: expansion coefficient of channels. Default: 1.0.
+    """
+
+    def __init__(
+        self,
+        arch: str = "small",
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        feature_size: int = 1536,
+        drop_rate: float = 0.2,
+        depth_multiplier: float = 1.0
+    ) -> None:
+        super(MixNet, self).__init__()
+        if arch == "small":
+            block_configs = [
+                [16, 16, [3], [1], [1], 1, 1, "ReLU", 0.0],
+                [16, 24, [3], [1, 1], [1, 1], 2, 6, "ReLU", 0.0],
+                [24, 24, [3], [1, 1], [1, 1], 1, 3, "ReLU", 0.0],
+                [24, 40, [3, 5, 7], [1], [1], 2, 6, "Swish", 0.5],
+                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, "Swish", 0.5],
+                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, "Swish", 0.5],
+                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, "Swish", 0.5],
+                [40, 80, [3, 5, 7], [1], [1, 1], 2, 6, "Swish", 0.25],
+                [80, 80, [3, 5], [1], [1, 1], 1, 6, "Swish", 0.25],
+                [80, 80, [3, 5], [1], [1, 1], 1, 6, "Swish", 0.25],
+                [80, 120, [3, 5, 7], [1, 1], [1, 1], 1, 6, "Swish", 0.5],
+                [120, 120, [3, 5, 7, 9], [1, 1], [1, 1], 1, 3, "Swish", 0.5],
+                [120, 120, [3, 5, 7, 9], [1, 1], [1, 1], 1, 3, "Swish", 0.5],
+                [120, 200, [3, 5, 7, 9, 11], [1], [1], 2, 6, "Swish", 0.5],
+                [200, 200, [3, 5, 7, 9], [1], [1, 1], 1, 6, "Swish", 0.5],
+                [200, 200, [3, 5, 7, 9], [1], [1, 1], 1, 6, "Swish", 0.5]
+            ]
+            stem_channels = 16
+            drop_rate = drop_rate
+        else:
+            block_configs = [
+                [24, 24, [3], [1], [1], 1, 1, "ReLU", 0.0],
+                [24, 32, [3, 5, 7], [1, 1], [1, 1], 2, 6, "ReLU", 0.0],
+                [32, 32, [3], [1, 1], [1, 1], 1, 3, "ReLU", 0.0],
+                [32, 40, [3, 5, 7, 9], [1], [1], 2, 6, "Swish", 0.5],
+                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, "Swish", 0.5],
+                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, "Swish", 0.5],
+                [40, 40, [3, 5], [1, 1], [1, 1], 1, 6, "Swish", 0.5],
+                [40, 80, [3, 5, 7], [1], [1], 2, 6, "Swish", 0.25],
+                [80, 80, [3, 5, 7, 9], [1, 1], [1, 1], 1, 6, "Swish", 0.25],
+                [80, 80, [3, 5, 7, 9], [1, 1], [1, 1], 1, 6, "Swish", 0.25],
+                [80, 80, [3, 5, 7, 9], [1, 1], [1, 1], 1, 6, "Swish", 0.25],
+                [80, 120, [3], [1], [1], 1, 6, "Swish", 0.5],
+                [120, 120, [3, 5, 7, 9], [1, 1], [1, 1], 1, 3, "Swish", 0.5],
+                [120, 120, [3, 5, 7, 9], [1, 1], [1, 1], 1, 3, "Swish", 0.5],
+                [120, 120, [3, 5, 7, 9], [1, 1], [1, 1], 1, 3, "Swish", 0.5],
+                [120, 200, [3, 5, 7, 9], [1], [1], 2, 6, "Swish", 0.5],
+                [200, 200, [3, 5, 7, 9], [1], [1, 1], 1, 6, "Swish", 0.5],
+                [200, 200, [3, 5, 7, 9], [1], [1, 1], 1, 6, "Swish", 0.5],
+                [200, 200, [3, 5, 7, 9], [1], [1, 1], 1, 6, "Swish", 0.5]
+            ]
+            if arch == "medium":
+                stem_channels = 24
+                drop_rate = drop_rate
+            elif arch == "large":
+                stem_channels = 24
+                depth_multiplier *= 1.3
+                drop_rate = drop_rate
+            else:
+                raise ValueError(f"Unsupported model type {arch}")
+
+        if depth_multiplier != 1.0:
+            stem_channels = _roundchannels(stem_channels * depth_multiplier)
+
+            for i, conf in enumerate(block_configs):
+                conf_ls = list(conf)
+                conf_ls[0] = _roundchannels(conf_ls[0] * depth_multiplier)
+                conf_ls[1] = _roundchannels(conf_ls[1] * depth_multiplier)
+                block_configs[i] = tuple(conf_ls)
+
+        # stem convolution
+        self.stem_conv = nn.SequentialCell([
+            nn.Conv2d(in_channels, stem_channels, 3, stride=2, pad_mode="pad", padding=1),
+            nn.BatchNorm2d(stem_channels),
+            nn.ReLU()
+        ])
+
+        # building MixNet blocks
+        layers = []
+        for inc, outc, k, ek, pk, s, er, ac, se in block_configs:
+            layers.append(MixNetBlock(
+                inc,
+                outc,
+                kernel_size=k,
+                expand_ksize=ek,
+                project_ksize=pk,
+                stride=s,
+                expand_ratio=er,
+                activation=ac,
+                se_ratio=se
+            ))
+        self.layers = nn.SequentialCell(layers)
+
+        # head
+        self.head_conv = nn.SequentialCell([
+            nn.Conv2d(block_configs[-1][1], feature_size, 1, pad_mode="pad", padding=0),
+            nn.BatchNorm2d(feature_size),
+            nn.ReLU()
+        ])
+
+        self.pool = GlobalAvgPooling()
+        self.dropout = Dropout(p=drop_rate)
+        self.classifier = nn.Dense(feature_size, num_classes)
+
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                fan_out = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels
+                cell.weight.set_data(
+                    init.initializer(init.Normal(math.sqrt(2.0 / fan_out)),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.Uniform(1.0 / math.sqrt(cell.weight.shape[0])),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.stem_conv(x)
+        x = self.layers(x)
+        x = self.head_conv(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.dropout(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.mixnet.mixnet_l(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mixnet.py +
409
+410
+411
+412
+413
+414
+415
+416
+417
@register_model
+def mixnet_l(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["mixnet_l"]
+    model = MixNet(arch="large", in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mixnet.mixnet_m(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mixnet.py +
398
+399
+400
+401
+402
+403
+404
+405
+406
@register_model
+def mixnet_m(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["mixnet_m"]
+    model = MixNet(arch="medium", in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mixnet.mixnet_s(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mixnet.py +
387
+388
+389
+390
+391
+392
+393
+394
+395
@register_model
+def mixnet_s(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["mixnet_s"]
+    model = MixNet(arch="small", in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

mlpmixer

+ + +
+ + + +

+ mindcv.models.mlpmixer.MLPMixer + + +

+ + +
+

+ Bases: nn.Cell

+ + +

MLP-Mixer model class, based on +"MLP-Mixer: An all-MLP Architecture for Vision" <https://arxiv.org/abs/2105.01601>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
depth +
+

number of MixerBlocks.

+
+

+ + TYPE: + int) + +

+
patch_size +
+

size of a single image patch.

+
+

+ + TYPE: + int or tuple) + +

+
n_patches +
+

number of patches.

+
+

+ + TYPE: + int) + +

+
n_channels +
+

channels(dimension) of a single embedded patch.

+
+

+ + TYPE: + int) + +

+
token_dim +
+

hidden dim of token-mixing MLP.

+
+

+ + TYPE: + int) + +

+
channel_dim +
+

hidden dim of channel-mixing MLP.

+
+

+ + TYPE: + int) + +

+
num_classes +
+

number of classification classes.

+
+

+ + TYPE: + int) + + + DEFAULT: + 1000 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + DEFAULT: + 3 + +

+
+ +
+ Source code in mindcv/models/mlpmixer.py +
104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
class MLPMixer(nn.Cell):
+    r"""MLP-Mixer model class, based on
+    `"MLP-Mixer: An all-MLP Architecture for Vision" <https://arxiv.org/abs/2105.01601>`_
+
+    Args:
+        depth (int) : number of MixerBlocks.
+        patch_size (int or tuple) : size of a single image patch.
+        n_patches (int) : number of patches.
+        n_channels (int) : channels(dimension) of a single embedded patch.
+        token_dim (int) : hidden dim of token-mixing MLP.
+        channel_dim (int) : hidden dim of channel-mixing MLP.
+        num_classes (int) : number of classification classes.
+        in_channels: number the channels of the input. Default: 3.
+    """
+
+    def __init__(self, depth, patch_size, n_patches, n_channels, token_dim, channel_dim, num_classes=1000,
+                 in_channels=3):
+        super().__init__()
+        self.n_patches = n_patches
+        self.n_channels = n_channels
+        # patch with shape of (3, patch_size, patch_size) is embedded to n_channels dim feature.
+        self.to_patch_embedding = nn.SequentialCell(
+            nn.Conv2d(in_channels, n_channels, patch_size, patch_size, pad_mode="pad", padding=0),
+            TransPose(permutation=(0, 2, 1), embedding=True),
+        )
+        self.mixer_blocks = nn.SequentialCell()
+        for _ in range(depth):
+            self.mixer_blocks.append(MixerBlock(n_patches, n_channels, token_dim, channel_dim))
+        self.layer_norm = nn.LayerNorm((n_channels,))
+        self.mlp_head = nn.Dense(n_channels, num_classes)
+        self.mean = ops.ReduceMean()
+        self._initialize_weights()
+
+    def construct(self, x):
+        x = self.to_patch_embedding(x)
+        x = self.mixer_blocks(x)
+        x = self.layer_norm(x)
+        x = self.mean(x, 1)
+        return self.mlp_head(x)
+
+    def _initialize_weights(self):
+        # todo: implement weights init
+        pass
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.mlpmixer.mlp_mixer_b_p16(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mlpmixer.py +
203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
@register_model
+def mlp_mixer_b_p16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 12, 16, 196, 768, 384, 3072
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,
+                     num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    default_cfg = default_cfgs["mlp_mixer_b_p16"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mlpmixer.mlp_mixer_b_p32(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mlpmixer.py +
189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
@register_model
+def mlp_mixer_b_p32(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 12, 32, 49, 768, 384, 3072
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,
+                     num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    default_cfg = default_cfgs["mlp_mixer_b_p32"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mlpmixer.mlp_mixer_h_p14(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mlpmixer.py +
245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
@register_model
+def mlp_mixer_h_p14(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 32, 14, 256, 1280, 640, 5120
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,
+                     num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    default_cfg = default_cfgs["mlp_mixer_h_p14"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mlpmixer.mlp_mixer_l_p16(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mlpmixer.py +
231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
@register_model
+def mlp_mixer_l_p16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 24, 16, 196, 1024, 512, 4096
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,
+                     num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    default_cfg = default_cfgs["mlp_mixer_l_p16"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mlpmixer.mlp_mixer_l_p32(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mlpmixer.py +
217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
@register_model
+def mlp_mixer_l_p32(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 24, 32, 49, 1024, 512, 4096
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,
+                     num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    default_cfg = default_cfgs["mlp_mixer_l_p32"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mlpmixer.mlp_mixer_s_p16(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mlpmixer.py +
175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
@register_model
+def mlp_mixer_s_p16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    nl, pr, ls, hs, ds, dc = 8, 16, 196, 512, 256, 2048
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs, token_dim=ds, channel_dim=dc,
+                     num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    default_cfg = default_cfgs["mlp_mixer_s_p16"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mlpmixer.mlp_mixer_s_p32(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mlpmixer.py +
160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
@register_model
+def mlp_mixer_s_p32(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    # number_of_layers, patch_resolution, length_of_sequence, hidden_size, mpl_dim_sequence, mpl_dim_channel
+    nl, pr, ls, hs, ds, dc = 8, 32, 49, 512, 256, 2048
+    _check_resolution_and_length_of_patch(pr, ls)
+    model = MLPMixer(depth=nl, patch_size=pr, n_patches=ls, n_channels=hs,
+                     token_dim=ds, channel_dim=dc, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    default_cfg = default_cfgs["mlp_mixer_s_p32"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

mnasnet

+ + +
+ + + +

+ mindcv.models.mnasnet.Mnasnet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

MnasNet model architecture from +"MnasNet: Platform-Aware Neural Architecture Search for Mobile" <https://arxiv.org/abs/1807.11626>_.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
alpha +
+

scale factor of model width.

+
+

+ + TYPE: + float + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
drop_rate +
+

dropout rate of the layer before main classifier. Default: 0.2.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.2 + +

+
+ +
+ Source code in mindcv/models/mnasnet.py +
 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
class Mnasnet(nn.Cell):
+    r"""MnasNet model architecture from
+    `"MnasNet: Platform-Aware Neural Architecture Search for Mobile" <https://arxiv.org/abs/1807.11626>`_.
+
+    Args:
+        alpha: scale factor of model width.
+        in_channels: number the channels of the input. Default: 3.
+        num_classes: number of classification classes. Default: 1000.
+        drop_rate: dropout rate of the layer before main classifier. Default: 0.2.
+    """
+
+    def __init__(
+        self,
+        alpha: float,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+        drop_rate: float = 0.2,
+    ):
+        super().__init__()
+
+        inverted_residual_setting = [
+            # t, c, n, s, k
+            [3, 24, 3, 2, 3],  # -> 56x56
+            [3, 40, 3, 2, 5],  # -> 28x28
+            [6, 80, 3, 2, 5],  # -> 14x14
+            [6, 96, 2, 1, 3],  # -> 14x14
+            [6, 192, 4, 2, 5],  # -> 7x7
+            [6, 320, 1, 1, 3],  # -> 7x7
+        ]
+
+        mid_channels = make_divisible(32 * alpha, 8)
+        input_channels = make_divisible(16 * alpha, 8)
+
+        features: List[nn.Cell] = [
+            nn.Conv2d(in_channels, mid_channels, kernel_size=3, stride=2, pad_mode="pad", padding=1),
+            nn.BatchNorm2d(mid_channels, momentum=0.99, eps=1e-3),
+            nn.ReLU(),
+            nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=1, pad_mode="pad", padding=1,
+                      group=mid_channels),
+            nn.BatchNorm2d(mid_channels, momentum=0.99, eps=1e-3),
+            nn.ReLU(),
+            nn.Conv2d(mid_channels, input_channels, kernel_size=1, stride=1),
+            nn.BatchNorm2d(input_channels, momentum=0.99, eps=1e-3),
+        ]
+
+        for t, c, n, s, k in inverted_residual_setting:
+            output_channels = make_divisible(c * alpha, 8)
+            for i in range(n):
+                stride = s if i == 0 else 1
+                features.append(InvertedResidual(input_channels, output_channels,
+                                                 stride=stride, kernel_size=k, expand_ratio=t))
+                input_channels = output_channels
+
+        features.extend([
+            nn.Conv2d(input_channels, 1280, kernel_size=1, stride=1),
+            nn.BatchNorm2d(1280, momentum=0.99, eps=1e-3),
+            nn.ReLU(),
+        ])
+        self.features = nn.SequentialCell(features)
+        self.pool = GlobalAvgPooling()
+        self.dropout = Dropout(p=drop_rate)
+        self.classifier = nn.Dense(1280, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(mode="fan_out", nonlinearity="relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(mode="fan_out", nonlinearity="sigmoid"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.dropout(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.mnasnet.mnasnet_050(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MnasNet model with width scaled by 0.5. +Refer to the base class models.Mnasnet for more details.

+ +
+ Source code in mindcv/models/mnasnet.py +
180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
@register_model
+def mnasnet_050(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> Mnasnet:
+    """Get MnasNet model with width scaled by 0.5.
+    Refer to the base class `models.Mnasnet` for more details."""
+    default_cfg = default_cfgs["mnasnet_050"]
+    model = Mnasnet(alpha=0.5, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mnasnet.mnasnet_075(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MnasNet model with width scaled by 0.75. +Refer to the base class models.Mnasnet for more details.

+ +
+ Source code in mindcv/models/mnasnet.py +
193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
@register_model
+def mnasnet_075(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> Mnasnet:
+    """Get MnasNet model with width scaled by 0.75.
+    Refer to the base class `models.Mnasnet` for more details."""
+    default_cfg = default_cfgs["mnasnet_075"]
+    model = Mnasnet(alpha=0.75, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mnasnet.mnasnet_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MnasNet model with width scaled by 1.0. +Refer to the base class models.Mnasnet for more details.

+ +
+ Source code in mindcv/models/mnasnet.py +
206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
@register_model
+def mnasnet_100(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> Mnasnet:
+    """Get MnasNet model with width scaled by 1.0.
+    Refer to the base class `models.Mnasnet` for more details."""
+    default_cfg = default_cfgs["mnasnet_100"]
+    model = Mnasnet(alpha=1.0, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mnasnet.mnasnet_130(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MnasNet model with width scaled by 1.3. +Refer to the base class models.Mnasnet for more details.

+ +
+ Source code in mindcv/models/mnasnet.py +
219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
@register_model
+def mnasnet_130(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> Mnasnet:
+    """Get MnasNet model with width scaled by 1.3.
+    Refer to the base class `models.Mnasnet` for more details."""
+    default_cfg = default_cfgs["mnasnet_130"]
+    model = Mnasnet(alpha=1.3, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mnasnet.mnasnet_140(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MnasNet model with width scaled by 1.4. +Refer to the base class models.Mnasnet for more details.

+ +
+ Source code in mindcv/models/mnasnet.py +
232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
@register_model
+def mnasnet_140(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> Mnasnet:
+    """Get MnasNet model with width scaled by 1.4.
+    Refer to the base class `models.Mnasnet` for more details."""
+    default_cfg = default_cfgs["mnasnet_140"]
+    model = Mnasnet(alpha=1.4, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

mobilenetv1

+ + +
+ + + +

+ mindcv.models.mobilenetv1.MobileNetV1 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

MobileNetV1 model class, based on +"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" <https://arxiv.org/abs/1704.04861>_ # noqa: E501

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
alpha +
+

scale factor of model width. Default: 1.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
+ +
+ Source code in mindcv/models/mobilenetv1.py +
 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
class MobileNetV1(nn.Cell):
+    r"""MobileNetV1 model class, based on
+    `"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" <https://arxiv.org/abs/1704.04861>`_  # noqa: E501
+
+    Args:
+        alpha: scale factor of model width. Default: 1.
+        in_channels: number the channels of the input. Default: 3.
+        num_classes: number of classification classes. Default: 1000.
+    """
+
+    def __init__(
+        self,
+        alpha: float = 1.0,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+    ) -> None:
+        super().__init__()
+        input_channels = int(32 * alpha)
+        # Setting of depth-wise separable conv
+        # c: number of output channel
+        # s: stride of depth-wise conv
+        block_setting = [
+            # c, s
+            [64, 1],
+            [128, 2],
+            [128, 1],
+            [256, 2],
+            [256, 1],
+            [512, 2],
+            [512, 1],
+            [512, 1],
+            [512, 1],
+            [512, 1],
+            [512, 1],
+            [1024, 2],
+            [1024, 1],
+        ]
+
+        features = [
+            nn.Conv2d(in_channels, input_channels, 3, 2, pad_mode="pad", padding=1, has_bias=False),
+            nn.BatchNorm2d(input_channels),
+            nn.ReLU(),
+        ]
+        for c, s in block_setting:
+            output_channel = int(c * alpha)
+            features.append(depthwise_separable_conv(input_channels, output_channel, s))
+            input_channels = output_channel
+        self.features = nn.SequentialCell(features)
+
+        self.pool = GlobalAvgPooling()
+        self.classifier = nn.Dense(input_channels, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(init.initializer(init.XavierUniform(), cell.weight.shape, cell.weight.dtype))
+            if isinstance(cell, nn.Dense):
+                cell.weight.set_data(init.initializer(init.TruncatedNormal(), cell.weight.shape, cell.weight.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv1.mobilenet_v1_025(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV1 model with width scaled by 0.25. +Refer to the base class models.MobileNetV1 for more details.

+ +
+ Source code in mindcv/models/mobilenetv1.py +
137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
@register_model
+def mobilenet_v1_025(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV1:
+    """Get MobileNetV1 model with width scaled by 0.25.
+    Refer to the base class `models.MobileNetV1` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v1_025"]
+    model = MobileNetV1(alpha=0.25, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv1.mobilenet_v1_050(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV1 model with width scaled by 0.5. +Refer to the base class models.MobileNetV1 for more details.

+ +
+ Source code in mindcv/models/mobilenetv1.py +
151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
@register_model
+def mobilenet_v1_050(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV1:
+    """Get MobileNetV1 model with width scaled by 0.5.
+    Refer to the base class `models.MobileNetV1` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v1_050"]
+    model = MobileNetV1(alpha=0.5, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv1.mobilenet_v1_075(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV1 model with width scaled by 0.75. +Refer to the base class models.MobileNetV1 for more details.

+ +
+ Source code in mindcv/models/mobilenetv1.py +
165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
@register_model
+def mobilenet_v1_075(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV1:
+    """Get MobileNetV1 model with width scaled by 0.75.
+    Refer to the base class `models.MobileNetV1` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v1_075"]
+    model = MobileNetV1(alpha=0.75, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv1.mobilenet_v1_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV1 model without width scaling. +Refer to the base class models.MobileNetV1 for more details.

+ +
+ Source code in mindcv/models/mobilenetv1.py +
179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
@register_model
+def mobilenet_v1_100(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV1:
+    """Get MobileNetV1 model without width scaling.
+    Refer to the base class `models.MobileNetV1` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v1_100"]
+    model = MobileNetV1(alpha=1.0, in_channels=in_channels, num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

mobilenetv2

+ + +
+ + + +

+ mindcv.models.mobilenetv2.MobileNetV2 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

MobileNetV2 model class, based on +"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
alpha +
+

scale factor of model width. Default: 1.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
round_nearest +
+

divisor of make divisible function. Default: 8.

+
+

+ + TYPE: + int + + + DEFAULT: + 8 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
+ +
+ Source code in mindcv/models/mobilenetv2.py +
163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
class MobileNetV2(nn.Cell):
+    r"""MobileNetV2 model class, based on
+    `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_
+
+    Args:
+        alpha: scale factor of model width. Default: 1.
+        round_nearest: divisor of make divisible function. Default: 8.
+        in_channels: number the channels of the input. Default: 3.
+        num_classes: number of classification classes. Default: 1000.
+    """
+
+    def __init__(
+        self,
+        alpha: float = 1.0,
+        round_nearest: int = 8,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+    ) -> None:
+        super().__init__()
+        input_channels = make_divisible(32 * alpha, round_nearest)
+        # Setting of inverted residual blocks.
+        # t: The expansion factor.
+        # c: Number of output channel.
+        # n: Number of block.
+        # s: First block stride.
+        inverted_residual_setting = [
+            # t, c, n, s
+            [1, 16, 1, 1],
+            [6, 24, 2, 2],
+            [6, 32, 3, 2],
+            [6, 64, 4, 2],
+            [6, 96, 3, 1],
+            [6, 160, 3, 2],
+            [6, 320, 1, 1],
+        ]
+        last_channels = make_divisible(1280 * max(1.0, alpha), round_nearest)
+
+        # Building stem conv layer.
+        features = [
+            nn.Conv2d(in_channels, input_channels, 3, 2, pad_mode="pad", padding=1, has_bias=False),
+            nn.BatchNorm2d(input_channels),
+            nn.ReLU6(),
+        ]
+        # Building inverted residual blocks.
+        for t, c, n, s in inverted_residual_setting:
+            output_channel = make_divisible(c * alpha, round_nearest)
+            for i in range(n):
+                stride = s if i == 0 else 1
+                features.append(InvertedResidual(input_channels, output_channel, stride, expand_ratio=t))
+                input_channels = output_channel
+        # Building last point-wise layers.
+        features.extend([
+            nn.Conv2d(input_channels, last_channels, 1, 1, pad_mode="pad", padding=0, has_bias=False),
+            nn.BatchNorm2d(last_channels),
+            nn.ReLU6(),
+        ])
+        self.features = nn.SequentialCell(features)
+
+        self.pool = GlobalAvgPooling()
+        self.classifier = nn.SequentialCell([
+            Dropout(p=0.2),  # confirmed by paper authors
+            nn.Dense(last_channels, num_classes),
+        ])
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                n = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels
+                cell.weight.set_data(
+                    init.initializer(init.Normal(sigma=math.sqrt(2. / n), mean=0.0),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.Normal(sigma=0.01, mean=0.0), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_035_128(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.35 and input image size of 128. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
@register_model
+def mobilenet_v2_035_128(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.35 and input image size of 128.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_035_128"]
+    model = MobileNetV2(alpha=0.35, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_035_160(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.35 and input image size of 160. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
@register_model
+def mobilenet_v2_035_160(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.35 and input image size of 160.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_035_160"]
+    model = MobileNetV2(alpha=0.35, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_035_192(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.35 and input image size of 192. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
514
+515
+516
+517
+518
+519
+520
+521
+522
+523
+524
+525
@register_model
+def mobilenet_v2_035_192(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.35 and input image size of 192.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_035_192"]
+    model = MobileNetV2(alpha=0.35, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_035_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.35 and input image size of 224. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
@register_model
+def mobilenet_v2_035_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.35 and input image size of 224.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_035_224"]
+    model = MobileNetV2(alpha=0.35, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_035_96(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.35 and input image size of 96. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
@register_model
+def mobilenet_v2_035_96(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.35 and input image size of 96.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_035_96"]
+    model = MobileNetV2(alpha=0.35, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_050_128(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.5 and input image size of 128. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
@register_model
+def mobilenet_v2_050_128(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.5 and input image size of 128.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_050_128"]
+    model = MobileNetV2(alpha=0.5, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_050_160(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.5 and input image size of 160. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
@register_model
+def mobilenet_v2_050_160(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.5 and input image size of 160.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_050_160"]
+    model = MobileNetV2(alpha=0.5, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_050_192(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.5 and input image size of 192. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
@register_model
+def mobilenet_v2_050_192(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.5 and input image size of 192.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_050_192"]
+    model = MobileNetV2(alpha=0.5, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_050_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.5 and input image size of 224. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
@register_model
+def mobilenet_v2_050_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.5 and input image size of 224.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_050_224"]
+    model = MobileNetV2(alpha=0.5, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_050_96(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.5 and input image size of 96. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
+497
@register_model
+def mobilenet_v2_050_96(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.5 and input image size of 96.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_050_96"]
+    model = MobileNetV2(alpha=0.5, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_075(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.75 and input image size of 224. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
@register_model
+def mobilenet_v2_075(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.75 and input image size of 224.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_075"]
+    model = MobileNetV2(alpha=0.75, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_075_128(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.75 and input image size of 128. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
@register_model
+def mobilenet_v2_075_128(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.75 and input image size of 128.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_075_128"]
+    model = MobileNetV2(alpha=0.75, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_075_160(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.75 and input image size of 160. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
@register_model
+def mobilenet_v2_075_160(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.75 and input image size of 160.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_075_160"]
+    model = MobileNetV2(alpha=0.75, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_075_192(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.75 and input image size of 192. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
@register_model
+def mobilenet_v2_075_192(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.75 and input image size of 192.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_075_192"]
+    model = MobileNetV2(alpha=0.75, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_075_96(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 0.75 and input image size of 96. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
@register_model
+def mobilenet_v2_075_96(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 0.75 and input image size of 96.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_075_96"]
+    model = MobileNetV2(alpha=0.75, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model without width scaling and input image size of 224. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
@register_model
+def mobilenet_v2_100(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model without width scaling and input image size of 224.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_100"]
+    model = MobileNetV2(alpha=1.0, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_100_128(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model without width scaling and input image size of 128. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
@register_model
+def mobilenet_v2_100_128(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model without width scaling and input image size of 128.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_100_128"]
+    model = MobileNetV2(alpha=1.0, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_100_160(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model without width scaling and input image size of 160. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
@register_model
+def mobilenet_v2_100_160(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model without width scaling and input image size of 160.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_100_160"]
+    model = MobileNetV2(alpha=1.0, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_100_192(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model without width scaling and input image size of 192. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
@register_model
+def mobilenet_v2_100_192(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model without width scaling and input image size of 192.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_100_192"]
+    model = MobileNetV2(alpha=1.0, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_100_96(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model without width scaling and input image size of 96. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
@register_model
+def mobilenet_v2_100_96(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model without width scaling and input image size of 96.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_100_96"]
+    model = MobileNetV2(alpha=1.0, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_130_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 1.3 and input image size of 224. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
@register_model
+def mobilenet_v2_130_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 1.3 and input image size of 224.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_130_224"]
+    model = MobileNetV2(alpha=1.3, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv2.mobilenet_v2_140(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get MobileNetV2 model with width scaled by 1.4 and input image size of 224. +Refer to the base class models.MobileNetV2 for more details.

+ +
+ Source code in mindcv/models/mobilenetv2.py +
262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
@register_model
+def mobilenet_v2_140(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV2:
+    """Get MobileNetV2 model with width scaled by 1.4 and input image size of 224.
+    Refer to the base class `models.MobileNetV2` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v2_140"]
+    model = MobileNetV2(alpha=1.4, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

mobilenetv3

+ + +
+ + + +

+ mindcv.models.mobilenetv3.MobileNetV3 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

MobileNetV3 model class, based on +"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
arch +
+

size of the architecture. 'small' or 'large'.

+
+

+ + TYPE: + str + +

+
alpha +
+

scale factor of model width. Default: 1.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
round_nearest +
+

divisor of make divisible function. Default: 8.

+
+

+ + TYPE: + int + + + DEFAULT: + 8 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
+ +
+ Source code in mindcv/models/mobilenetv3.py +
100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
class MobileNetV3(nn.Cell):
+    r"""MobileNetV3 model class, based on
+    `"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>`_
+
+    Args:
+        arch: size of the architecture. 'small' or 'large'.
+        alpha: scale factor of model width. Default: 1.
+        round_nearest: divisor of make divisible function. Default: 8.
+        in_channels: number the channels of the input. Default: 3.
+        num_classes: number of classification classes. Default: 1000.
+    """
+
+    def __init__(
+        self,
+        arch: str,
+        alpha: float = 1.0,
+        round_nearest: int = 8,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+    ) -> None:
+        super().__init__()
+        input_channels = make_divisible(16 * alpha, round_nearest)
+        # Setting of bottleneck blocks. ex: [k, e, c, se, nl, s]
+        # k: kernel size of depth-wise conv
+        # e: expansion size
+        # c: number of output channel
+        # se: whether there is a Squeeze-And-Excite in that block
+        # nl: type of non-linearity used
+        # s: stride of depth-wise conv
+        if arch == "large":
+            bottleneck_setting = [
+                [3, 16, 16, False, "relu", 1],
+                [3, 64, 24, False, "relu", 2],
+                [3, 72, 24, False, "relu", 1],
+                [5, 72, 40, True, "relu", 2],
+                [5, 120, 40, True, "relu", 1],
+                [5, 120, 40, True, "relu", 1],
+                [3, 240, 80, False, "hswish", 2],
+                [3, 200, 80, False, "hswish", 1],
+                [3, 184, 80, False, "hswish", 1],
+                [3, 184, 80, False, "hswish", 1],
+                [3, 480, 112, True, "hswish", 1],
+                [3, 672, 112, True, "hswish", 1],
+                [5, 672, 160, True, "hswish", 2],
+                [5, 960, 160, True, "hswish", 1],
+                [5, 960, 160, True, "hswish", 1],
+            ]
+            last_channels = make_divisible(alpha * 1280, round_nearest)
+        elif arch == "small":
+            bottleneck_setting = [
+                [3, 16, 16, True, "relu", 2],
+                [3, 72, 24, False, "relu", 2],
+                [3, 88, 24, False, "relu", 1],
+                [5, 96, 40, True, "hswish", 2],
+                [5, 240, 40, True, "hswish", 1],
+                [5, 240, 40, True, "hswish", 1],
+                [5, 120, 48, True, "hswish", 1],
+                [5, 144, 48, True, "hswish", 1],
+                [5, 288, 96, True, "hswish", 2],
+                [5, 576, 96, True, "hswish", 1],
+                [5, 576, 96, True, "hswish", 1],
+            ]
+            last_channels = make_divisible(alpha * 1024, round_nearest)
+        else:
+            raise ValueError(f"Unsupported model type {arch}")
+
+        # Building stem conv layer.
+        features = [
+            nn.Conv2d(in_channels, input_channels, 3, 2, pad_mode="pad", padding=1, has_bias=False),
+            nn.BatchNorm2d(input_channels),
+            nn.HSwish(),
+        ]
+
+        total_reduction = 2
+        self.feature_info = [dict(chs=input_channels, reduction=total_reduction, name=f'features.{len(features) - 1}')]
+
+        # Building bottleneck blocks.
+        for k, e, c, se, nl, s in bottleneck_setting:
+            exp_channels = make_divisible(alpha * e, round_nearest)
+            output_channels = make_divisible(alpha * c, round_nearest)
+            features.append(Bottleneck(input_channels, exp_channels, output_channels,
+                                       kernel_size=k, stride=s, activation=nl, use_se=se))
+            input_channels = output_channels
+
+            total_reduction *= s
+            self.feature_info.append(dict(chs=input_channels, reduction=total_reduction,
+                                          name=f'features.{len(features) - 1}'))
+
+        # Building last point-wise conv layers.
+        output_channels = input_channels * 6
+        features.extend([
+            nn.Conv2d(input_channels, output_channels, 1, 1, pad_mode="pad", padding=0, has_bias=False),
+            nn.BatchNorm2d(output_channels),
+            nn.HSwish(),
+        ])
+
+        self.feature_info.append(dict(chs=output_channels, reduction=total_reduction,
+                                      name=f'features.{len(features) - 1}'))
+        self.flatten_sequential = True
+
+        self.features = nn.SequentialCell(features)
+
+        self.pool = GlobalAvgPooling()
+        self.classifier = nn.SequentialCell([
+            nn.Dense(output_channels, last_channels),
+            nn.HSwish(),
+            Dropout(p=0.2),
+            nn.Dense(last_channels, num_classes),
+        ])
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                n = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels
+                cell.weight.set_data(
+                    init.initializer(init.Normal(sigma=math.sqrt(2. / n), mean=0.0),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.Normal(sigma=0.01, mean=0.0), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv3.mobilenet_v3_large_075(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get large MobileNetV3 model with width scaled by 0.75. +Refer to the base class models.MobileNetV3 for more details.

+ +
+ Source code in mindcv/models/mobilenetv3.py +
279
+280
+281
+282
+283
+284
+285
+286
@register_model
+def mobilenet_v3_large_075(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV3:
+    """Get large MobileNetV3 model with width scaled by 0.75.
+    Refer to the base class `models.MobileNetV3` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v3_large_075"]
+    model_args = dict(arch="large", alpha=0.75, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return _create_mobilenet_v3(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv3.mobilenet_v3_large_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get large MobileNetV3 model without width scaling. +Refer to the base class models.MobileNetV3 for more details.

+ +
+ Source code in mindcv/models/mobilenetv3.py +
259
+260
+261
+262
+263
+264
+265
+266
@register_model
+def mobilenet_v3_large_100(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV3:
+    """Get large MobileNetV3 model without width scaling.
+    Refer to the base class `models.MobileNetV3` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v3_large_100"]
+    model_args = dict(arch="large", alpha=1.0, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return _create_mobilenet_v3(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv3.mobilenet_v3_small_075(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get small MobileNetV3 model with width scaled by 0.75. +Refer to the base class models.MobileNetV3 for more details.

+ +
+ Source code in mindcv/models/mobilenetv3.py +
269
+270
+271
+272
+273
+274
+275
+276
@register_model
+def mobilenet_v3_small_075(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV3:
+    """Get small MobileNetV3 model with width scaled by 0.75.
+    Refer to the base class `models.MobileNetV3` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v3_small_075"]
+    model_args = dict(arch="small", alpha=0.75, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return _create_mobilenet_v3(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilenetv3.mobilenet_v3_small_100(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get small MobileNetV3 model without width scaling. +Refer to the base class models.MobileNetV3 for more details.

+ +
+ Source code in mindcv/models/mobilenetv3.py +
249
+250
+251
+252
+253
+254
+255
+256
@register_model
+def mobilenet_v3_small_100(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileNetV3:
+    """Get small MobileNetV3 model without width scaling.
+    Refer to the base class `models.MobileNetV3` for more details.
+    """
+    default_cfg = default_cfgs["mobilenet_v3_small_100"]
+    model_args = dict(arch="small", alpha=1.0, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return _create_mobilenet_v3(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +

mobilevit

+ + + +
+ + + +

+mindcv.models.mobilevit.mobilevit_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mobilevit.py +
840
+841
+842
+843
+844
+845
+846
+847
@register_model
+def mobilevit_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileViT:
+    config = get_config("small")
+    model = MobileViT(config, num_classes=num_classes, **kwargs)
+    default_cfg = default_cfgs["mobilevit_small"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilevit.mobilevit_x_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mobilevit.py +
830
+831
+832
+833
+834
+835
+836
+837
@register_model
+def mobilevit_x_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileViT:
+    config = get_config("x_small")
+    model = MobileViT(config, num_classes=num_classes, **kwargs)
+    default_cfg = default_cfgs["mobilevit_x_small"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.mobilevit.mobilevit_xx_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/mobilevit.py +
820
+821
+822
+823
+824
+825
+826
+827
@register_model
+def mobilevit_xx_small(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> MobileViT:
+    config = get_config("xx_small")
+    model = MobileViT(config, num_classes=num_classes, **kwargs)
+    default_cfg = default_cfgs["mobilevit_xx_small"]
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

nasnet

+ + +
+ + + +

+ mindcv.models.nasnet.NASNetAMobile + + +

+ + +
+

+ Bases: nn.Cell

+ + +

NasNet model class, based on +"Learning Transferable Architectures for Scalable Image Recognition" <https://arxiv.org/pdf/1707.07012v4.pdf>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_classes +
+

number of classification classes.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
stem_filters +
+

number of stem filters. Default: 32.

+
+

+ + TYPE: + int + + + DEFAULT: + 32 + +

+
penultimate_filters +
+

number of penultimate filters. Default: 1056.

+
+

+ + TYPE: + int + + + DEFAULT: + 1056 + +

+
filters_multiplier +
+

size of filters multiplier. Default: 2.

+
+

+ + TYPE: + int + + + DEFAULT: + 2 + +

+
+ +
+ Source code in mindcv/models/nasnet.py +
681
+682
+683
+684
+685
+686
+687
+688
+689
+690
+691
+692
+693
+694
+695
+696
+697
+698
+699
+700
+701
+702
+703
+704
+705
+706
+707
+708
+709
+710
+711
+712
+713
+714
+715
+716
+717
+718
+719
+720
+721
+722
+723
+724
+725
+726
+727
+728
+729
+730
+731
+732
+733
+734
+735
+736
+737
+738
+739
+740
+741
+742
+743
+744
+745
+746
+747
+748
+749
+750
+751
+752
+753
+754
+755
+756
+757
+758
+759
+760
+761
+762
+763
+764
+765
+766
+767
+768
+769
+770
+771
+772
+773
+774
+775
+776
+777
+778
+779
+780
+781
+782
+783
+784
+785
+786
+787
+788
+789
+790
+791
+792
+793
+794
+795
+796
+797
+798
+799
+800
+801
+802
+803
+804
+805
+806
+807
+808
+809
+810
+811
+812
+813
+814
+815
+816
+817
+818
+819
+820
+821
+822
+823
+824
+825
+826
+827
+828
+829
+830
+831
+832
+833
+834
+835
+836
+837
+838
+839
+840
+841
+842
+843
+844
+845
+846
+847
+848
+849
+850
+851
+852
+853
+854
+855
+856
+857
+858
+859
+860
+861
+862
+863
+864
+865
+866
+867
+868
+869
+870
+871
class NASNetAMobile(nn.Cell):
+    r"""NasNet model class, based on
+    `"Learning Transferable Architectures for Scalable Image Recognition" <https://arxiv.org/pdf/1707.07012v4.pdf>`_
+    Args:
+        num_classes: number of classification classes.
+        stem_filters: number of stem filters. Default: 32.
+        penultimate_filters: number of penultimate filters. Default: 1056.
+        filters_multiplier: size of filters multiplier. Default: 2.
+    """
+
+    def __init__(
+        self,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+        stem_filters: int = 32,
+        penultimate_filters: int = 1056,
+        filters_multiplier: int = 2,
+    ) -> None:
+        super().__init__()
+        self.stem_filters = stem_filters
+        self.penultimate_filters = penultimate_filters
+        self.filters_multiplier = filters_multiplier
+
+        filters = self.penultimate_filters // 24
+        # 24 is default value for the architecture
+
+        self.conv0 = nn.SequentialCell([
+            nn.Conv2d(in_channels=in_channels, out_channels=self.stem_filters, kernel_size=3, stride=2, pad_mode="pad",
+                      padding=0,
+                      has_bias=False),
+            nn.BatchNorm2d(num_features=self.stem_filters, eps=0.001, momentum=0.9, affine=True)
+        ])
+
+        self.cell_stem_0 = CellStem0(
+            self.stem_filters, num_filters=filters // (filters_multiplier ** 2)
+        )
+        self.cell_stem_1 = CellStem1(
+            self.stem_filters, num_filters=filters // filters_multiplier
+        )
+
+        self.cell_0 = FirstCell(
+            in_channels_left=filters,
+            out_channels_left=filters // 2,  # 1, 0.5
+            in_channels_right=2 * filters,
+            out_channels_right=filters,
+        )  # 2, 1
+        self.cell_1 = NormalCell(
+            in_channels_left=2 * filters,
+            out_channels_left=filters,  # 2, 1
+            in_channels_right=6 * filters,
+            out_channels_right=filters,
+        )  # 6, 1
+        self.cell_2 = NormalCell(
+            in_channels_left=6 * filters,
+            out_channels_left=filters,  # 6, 1
+            in_channels_right=6 * filters,
+            out_channels_right=filters,
+        )  # 6, 1
+        self.cell_3 = NormalCell(
+            in_channels_left=6 * filters,
+            out_channels_left=filters,  # 6, 1
+            in_channels_right=6 * filters,
+            out_channels_right=filters,
+        )  # 6, 1
+
+        self.reduction_cell_0 = ReductionCell0(
+            in_channels_left=6 * filters,
+            out_channels_left=2 * filters,  # 6, 2
+            in_channels_right=6 * filters,
+            out_channels_right=2 * filters,
+        )  # 6, 2
+
+        self.cell_6 = FirstCell(
+            in_channels_left=6 * filters,
+            out_channels_left=filters,  # 6, 1
+            in_channels_right=8 * filters,
+            out_channels_right=2 * filters,
+        )  # 8, 2
+        self.cell_7 = NormalCell(
+            in_channels_left=8 * filters,
+            out_channels_left=2 * filters,  # 8, 2
+            in_channels_right=12 * filters,
+            out_channels_right=2 * filters,
+        )  # 12, 2
+        self.cell_8 = NormalCell(
+            in_channels_left=12 * filters,
+            out_channels_left=2 * filters,  # 12, 2
+            in_channels_right=12 * filters,
+            out_channels_right=2 * filters,
+        )  # 12, 2
+        self.cell_9 = NormalCell(
+            in_channels_left=12 * filters,
+            out_channels_left=2 * filters,  # 12, 2
+            in_channels_right=12 * filters,
+            out_channels_right=2 * filters,
+        )  # 12, 2
+
+        self.reduction_cell_1 = ReductionCell1(
+            in_channels_left=12 * filters,
+            out_channels_left=4 * filters,  # 12, 4
+            in_channels_right=12 * filters,
+            out_channels_right=4 * filters,
+        )  # 12, 4
+
+        self.cell_12 = FirstCell(
+            in_channels_left=12 * filters,
+            out_channels_left=2 * filters,  # 12, 2
+            in_channels_right=16 * filters,
+            out_channels_right=4 * filters,
+        )  # 16, 4
+        self.cell_13 = NormalCell(
+            in_channels_left=16 * filters,
+            out_channels_left=4 * filters,  # 16, 4
+            in_channels_right=24 * filters,
+            out_channels_right=4 * filters,
+        )  # 24, 4
+        self.cell_14 = NormalCell(
+            in_channels_left=24 * filters,
+            out_channels_left=4 * filters,  # 24, 4
+            in_channels_right=24 * filters,
+            out_channels_right=4 * filters,
+        )  # 24, 4
+        self.cell_15 = NormalCell(
+            in_channels_left=24 * filters,
+            out_channels_left=4 * filters,  # 24, 4
+            in_channels_right=24 * filters,
+            out_channels_right=4 * filters,
+        )  # 24, 4
+
+        self.relu = nn.ReLU()
+        self.dropout = Dropout(p=0.5)
+        self.classifier = nn.Dense(in_channels=24 * filters, out_channels=num_classes)
+        self.pool = GlobalAvgPooling()
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        """Initialize weights for cells."""
+        self.init_parameters_data()
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                n = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels
+                cell.weight.set_data(init.initializer(init.Normal(math.sqrt(2. / n), 0),
+                                                      cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Zero(), cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer(init.One(), cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer(init.Zero(), cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(init.initializer(init.Normal(0.01, 0), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Zero(), cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        """Network forward feature extraction."""
+        x_conv0 = self.conv0(x)
+        x_stem_0 = self.cell_stem_0(x_conv0)
+        x_stem_1 = self.cell_stem_1(x_conv0, x_stem_0)
+
+        x_cell_0 = self.cell_0(x_stem_1, x_stem_0)
+        x_cell_1 = self.cell_1(x_cell_0, x_stem_1)
+        x_cell_2 = self.cell_2(x_cell_1, x_cell_0)
+        x_cell_3 = self.cell_3(x_cell_2, x_cell_1)
+
+        x_reduction_cell_0 = self.reduction_cell_0(x_cell_3, x_cell_2)
+
+        x_cell_6 = self.cell_6(x_reduction_cell_0, x_cell_3)
+        x_cell_7 = self.cell_7(x_cell_6, x_reduction_cell_0)
+        x_cell_8 = self.cell_8(x_cell_7, x_cell_6)
+        x_cell_9 = self.cell_9(x_cell_8, x_cell_7)
+
+        x_reduction_cell_1 = self.reduction_cell_1(x_cell_9, x_cell_8)
+
+        x_cell_12 = self.cell_12(x_reduction_cell_1, x_cell_9)
+        x_cell_13 = self.cell_13(x_cell_12, x_reduction_cell_1)
+        x_cell_14 = self.cell_14(x_cell_13, x_cell_12)
+        x_cell_15 = self.cell_15(x_cell_14, x_cell_13)
+
+        x_cell_15 = self.relu(x_cell_15)
+        return x_cell_15
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)  # global average pool
+        x = self.dropout(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.nasnet.NASNetAMobile.forward_features(x) + +

+ + +
+ +

Network forward feature extraction.

+ +
+ Source code in mindcv/models/nasnet.py +
834
+835
+836
+837
+838
+839
+840
+841
+842
+843
+844
+845
+846
+847
+848
+849
+850
+851
+852
+853
+854
+855
+856
+857
+858
+859
+860
def forward_features(self, x: Tensor) -> Tensor:
+    """Network forward feature extraction."""
+    x_conv0 = self.conv0(x)
+    x_stem_0 = self.cell_stem_0(x_conv0)
+    x_stem_1 = self.cell_stem_1(x_conv0, x_stem_0)
+
+    x_cell_0 = self.cell_0(x_stem_1, x_stem_0)
+    x_cell_1 = self.cell_1(x_cell_0, x_stem_1)
+    x_cell_2 = self.cell_2(x_cell_1, x_cell_0)
+    x_cell_3 = self.cell_3(x_cell_2, x_cell_1)
+
+    x_reduction_cell_0 = self.reduction_cell_0(x_cell_3, x_cell_2)
+
+    x_cell_6 = self.cell_6(x_reduction_cell_0, x_cell_3)
+    x_cell_7 = self.cell_7(x_cell_6, x_reduction_cell_0)
+    x_cell_8 = self.cell_8(x_cell_7, x_cell_6)
+    x_cell_9 = self.cell_9(x_cell_8, x_cell_7)
+
+    x_reduction_cell_1 = self.reduction_cell_1(x_cell_9, x_cell_8)
+
+    x_cell_12 = self.cell_12(x_reduction_cell_1, x_cell_9)
+    x_cell_13 = self.cell_13(x_cell_12, x_reduction_cell_1)
+    x_cell_14 = self.cell_14(x_cell_13, x_cell_12)
+    x_cell_15 = self.cell_15(x_cell_14, x_cell_13)
+
+    x_cell_15 = self.relu(x_cell_15)
+    return x_cell_15
+
+
+
+ +
+ + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.nasnet.nasnet_a_4x1056(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get NasNet model. +Refer to the base class models.NASNetAMobile for more details.

+ +
+ Source code in mindcv/models/nasnet.py +
874
+875
+876
+877
+878
+879
+880
+881
+882
@register_model
+def nasnet_a_4x1056(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> NASNetAMobile:
+    """Get NasNet model.
+    Refer to the base class `models.NASNetAMobile` for more details."""
+    default_cfg = default_cfgs["nasnet_a_4x1056"]
+    model = NASNetAMobile(in_channels=in_channels, num_classes=num_classes, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

pit

+ + + +
+ + + +

+mindcv.models.pit.pit_b(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PiT-B model. +Refer to the base class models.PoolingTransformer for more details.

+ +
+ Source code in mindcv/models/pit.py +
475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
@register_model
+def pit_b(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolingTransformer:
+    """Get PiT-B model.
+    Refer to the base class `models.PoolingTransformer` for more details."""
+    default_cfg = default_cfgs["pit_b"]
+    model = PoolingTransformer(
+        image_size=224,
+        patch_size=14,
+        stride=7,
+        base_dims=[64, 64, 64],
+        depth=[3, 6, 4],
+        heads=[4, 8, 16],
+        mlp_ratio=4.0,
+        num_classes=num_classes,
+        in_chans=in_channels,
+        **kwargs
+    )
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pit.pit_s(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PiT-S model. +Refer to the base class models.PoolingTransformer for more details.

+ +
+ Source code in mindcv/models/pit.py +
451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
@register_model
+def pit_s(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolingTransformer:
+    """Get PiT-S model.
+    Refer to the base class `models.PoolingTransformer` for more details."""
+    default_cfg = default_cfgs["pit_s"]
+    model = PoolingTransformer(
+        image_size=224,
+        patch_size=16,
+        stride=8,
+        base_dims=[48, 48, 48],
+        depth=[2, 6, 4],
+        heads=[3, 6, 12],
+        mlp_ratio=4.0,
+        num_classes=num_classes,
+        in_chans=in_channels,
+        **kwargs
+    )
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pit.pit_ti(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PiT-Ti model. +Refer to the base class models.PoolingTransformer for more details.

+ +
+ Source code in mindcv/models/pit.py +
403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
@register_model
+def pit_ti(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolingTransformer:
+    """Get PiT-Ti model.
+    Refer to the base class `models.PoolingTransformer` for more details."""
+    default_cfg = default_cfgs["pit_ti"]
+    model = PoolingTransformer(
+        image_size=224,
+        patch_size=16,
+        stride=8,
+        base_dims=[32, 32, 32],
+        depth=[2, 6, 4],
+        heads=[2, 4, 8],
+        mlp_ratio=4.0,
+        num_classes=num_classes,
+        in_chans=in_channels,
+        **kwargs
+    )
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pit.pit_xs(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PiT-XS model. +Refer to the base class models.PoolingTransformer for more details.

+ +
+ Source code in mindcv/models/pit.py +
427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
@register_model
+def pit_xs(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolingTransformer:
+    """Get PiT-XS model.
+    Refer to the base class `models.PoolingTransformer` for more details."""
+    default_cfg = default_cfgs["pit_xs"]
+    model = PoolingTransformer(
+        image_size=224,
+        patch_size=16,
+        stride=8,
+        base_dims=[48, 48, 48],
+        depth=[2, 6, 4],
+        heads=[2, 4, 8],
+        mlp_ratio=4.0,
+        num_classes=num_classes,
+        in_chans=in_channels,
+        **kwargs
+    )
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

poolformer

+ + +
+ + + +

+ mindcv.models.poolformer.PoolFormer + + +

+ + +
+

+ Bases: nn.Cell

+ + +

PoolFormer model class, based on +"MetaFormer Is Actually What You Need for Vision" <https://arxiv.org/pdf/2111.11418v3.pdf>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
layers +
+

number of blocks for the 4 stages

+
+

+

+
embed_dims +
+

the embedding dims for the 4 stages. Default: (64, 128, 320, 512)

+
+

+ + DEFAULT: + (64, 128, 320, 512) + +

+
mlp_ratios +
+

mlp ratios for the 4 stages. Default: (4, 4, 4, 4)

+
+

+ + DEFAULT: + (4, 4, 4, 4) + +

+
downsamples +
+

flags to apply downsampling or not. Default: (True, True, True, True)

+
+

+ + DEFAULT: + (True, True, True, True) + +

+
pool_size +
+

the pooling size for the 4 stages. Default: 3

+
+

+ + DEFAULT: + 3 + +

+
in_chans +
+

number of input channels. Default: 3

+
+

+ + DEFAULT: + 3 + +

+
num_classes +
+

number of classes for the image classification. Default: 1000

+
+

+ + DEFAULT: + 1000 + +

+
global_pool +
+

define the types of pooling layer. Default: avg

+
+

+ + DEFAULT: + 'avg' + +

+
norm_layer +
+

define the types of normalization. Default: nn.GroupNorm

+
+

+ + DEFAULT: + nn.GroupNorm + +

+
act_layer +
+

define the types of activation. Default: nn.GELU

+
+

+ + DEFAULT: + nn.GELU + +

+
in_patch_size +
+

specify the patch embedding for the input image. Default: 7

+
+

+ + DEFAULT: + 7 + +

+
in_stride +
+

specify the stride for the input image. Default: 4.

+
+

+ + DEFAULT: + 4 + +

+
in_pad +
+

specify the pad for the input image. Default: 2.

+
+

+ + DEFAULT: + 2 + +

+
down_patch_size +
+

specify the downsample. Default: 3.

+
+

+ + DEFAULT: + 3 + +

+
down_stride +
+

specify the downsample (patch embed.). Default: 2.

+
+

+ + DEFAULT: + 2 + +

+
down_pad +
+

specify the downsample (patch embed.). Default: 1.

+
+

+ + DEFAULT: + 1 + +

+
drop_rate +
+

dropout rate of the layer before main classifier. Default: 0.

+
+

+ + DEFAULT: + 0.0 + +

+
drop_path_rate +
+

Stochastic Depth. Default: 0.

+
+

+ + DEFAULT: + 0.0 + +

+
layer_scale_init_value +
+

LayerScale. Default: 1e-5.

+
+

+ + DEFAULT: + 1e-05 + +

+
fork_feat +
+

whether output features of the 4 stages, for dense prediction. Default: False.

+
+

+ + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/poolformer.py +
204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
class PoolFormer(nn.Cell):
+    r"""PoolFormer model class, based on
+    `"MetaFormer Is Actually What You Need for Vision" <https://arxiv.org/pdf/2111.11418v3.pdf>`_
+
+    Args:
+        layers: number of blocks for the 4 stages
+        embed_dims: the embedding dims for the 4 stages. Default: (64, 128, 320, 512)
+        mlp_ratios: mlp ratios for the 4 stages. Default: (4, 4, 4, 4)
+        downsamples: flags to apply downsampling or not. Default: (True, True, True, True)
+        pool_size: the pooling size for the 4 stages. Default: 3
+        in_chans: number of input channels. Default: 3
+        num_classes: number of classes for the image classification. Default: 1000
+        global_pool: define the types of pooling layer. Default: avg
+        norm_layer: define the types of normalization. Default: nn.GroupNorm
+        act_layer: define the types of activation. Default: nn.GELU
+        in_patch_size: specify the patch embedding for the input image. Default: 7
+        in_stride: specify the stride for the input image. Default: 4.
+        in_pad: specify the pad for the input image. Default: 2.
+        down_patch_size: specify the downsample. Default: 3.
+        down_stride: specify the downsample (patch embed.). Default: 2.
+        down_pad: specify the downsample (patch embed.). Default: 1.
+        drop_rate: dropout rate of the layer before main classifier. Default: 0.
+        drop_path_rate: Stochastic Depth. Default: 0.
+        layer_scale_init_value: LayerScale. Default: 1e-5.
+        fork_feat: whether output features of the 4 stages, for dense prediction. Default: False.
+    """
+
+    def __init__(
+        self,
+        layers,
+        embed_dims=(64, 128, 320, 512),
+        mlp_ratios=(4, 4, 4, 4),
+        downsamples=(True, True, True, True),
+        pool_size=3,
+        in_chans=3,
+        num_classes=1000,
+        global_pool="avg",
+        norm_layer=nn.GroupNorm,
+        act_layer=nn.GELU,
+        in_patch_size=7,
+        in_stride=4,
+        in_pad=2,
+        down_patch_size=3,
+        down_stride=2,
+        down_pad=1,
+        drop_rate=0.0,
+        drop_path_rate=0.0,
+        layer_scale_init_value=1e-5,
+        fork_feat=False,
+    ):
+        super().__init__()
+
+        if not fork_feat:
+            self.num_classes = num_classes
+        self.fork_feat = fork_feat
+
+        self.global_pool = global_pool
+        self.num_features = embed_dims[-1]
+        self.grad_checkpointing = False
+
+        self.patch_embed = PatchEmbed(
+            patch_size=in_patch_size, stride=in_stride, padding=in_pad,
+            in_chs=in_chans, embed_dim=embed_dims[0])
+
+        # set the main block in network
+        network = []
+        for i in range(len(layers)):
+            network.append(basic_blocks(
+                embed_dims[i], i, layers,
+                pool_size=pool_size, mlp_ratio=mlp_ratios[i],
+                act_layer=act_layer, norm_layer=norm_layer,
+                drop_rate=drop_rate, drop_path_rate=drop_path_rate,
+                layer_scale_init_value=layer_scale_init_value)
+            )
+            if i < len(layers) - 1 and (downsamples[i] or embed_dims[i] != embed_dims[i + 1]):
+                # downsampling between stages
+                network.append(PatchEmbed(
+                    in_chs=embed_dims[i], embed_dim=embed_dims[i + 1],
+                    patch_size=down_patch_size, stride=down_stride, padding=down_pad)
+                )
+
+        self.network = nn.SequentialCell(*network)
+        self.norm = norm_layer(1, embed_dims[-1])
+        self.head = nn.Dense(embed_dims[-1], num_classes, has_bias=True) if num_classes > 0 else Identity()
+        # self._initialize_weights()
+        self.cls_init_weights()
+
+    def cls_init_weights(self):
+        """Initialize weights for cells."""
+        for name, m in self.cells_and_names():
+            if isinstance(m, nn.Dense):
+                m.weight.set_data(
+                    init.initializer(init.TruncatedNormal(sigma=.02), m.weight.shape, m.weight.dtype))
+                if m.bias is not None:
+                    m.bias.set_data(
+                        init.initializer(init.Constant(0), m.bias.shape, m.bias.dtype))
+
+    def reset_classifier(self, num_classes, global_pool=None):
+        self.num_classes = num_classes
+        if global_pool is not None:
+            self.global_pool = global_pool
+        self.head = nn.Dense(self.num_features, num_classes) if num_classes > 0 else Identity()
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.patch_embed(x)
+        x = self.network(x)
+        if self.fork_feat:
+            # otuput features of four stages for dense prediction
+            return x
+        x = self.norm(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        return self.head(x.mean([-2, -1]))
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        return self.forward_head(x)
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.poolformer.PoolFormer.cls_init_weights() + +

+ + +
+ +

Initialize weights for cells.

+ +
+ Source code in mindcv/models/poolformer.py +
291
+292
+293
+294
+295
+296
+297
+298
+299
def cls_init_weights(self):
+    """Initialize weights for cells."""
+    for name, m in self.cells_and_names():
+        if isinstance(m, nn.Dense):
+            m.weight.set_data(
+                init.initializer(init.TruncatedNormal(sigma=.02), m.weight.shape, m.weight.dtype))
+            if m.bias is not None:
+                m.bias.set_data(
+                    init.initializer(init.Constant(0), m.bias.shape, m.bias.dtype))
+
+
+
+ +
+ + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.poolformer.poolformer_m36(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get poolformer_m36 model. +Refer to the base class models.PoolFormer for more details.

+ +
+ Source code in mindcv/models/poolformer.py +
359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
@register_model
+def poolformer_m36(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolFormer:
+    """Get poolformer_m36 model.
+    Refer to the base class `models.PoolFormer` for more details."""
+    default_cfg = default_cfgs["poolformer_m36"]
+    layers = (6, 6, 18, 6)
+    embed_dims = (96, 192, 384, 768)
+    model = PoolFormer(
+        in_chans=in_channels,
+        num_classes=num_classes,
+        layers=layers,
+        layer_scale_init_value=1e-6,
+        embed_dims=embed_dims,
+        **kwargs
+    )
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.poolformer.poolformer_m48(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get poolformer_m48 model. +Refer to the base class models.PoolFormer for more details.

+ +
+ Source code in mindcv/models/poolformer.py +
379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
@register_model
+def poolformer_m48(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolFormer:
+    """Get poolformer_m48 model.
+    Refer to the base class `models.PoolFormer` for more details."""
+    default_cfg = default_cfgs["poolformer_m48"]
+    layers = (8, 8, 24, 8)
+    embed_dims = (96, 192, 384, 768)
+    model = PoolFormer(
+        in_chans=in_channels,
+        num_classes=num_classes,
+        layers=layers,
+        layer_scale_init_value=1e-6,
+        embed_dims=embed_dims,
+        **kwargs
+    )
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.poolformer.poolformer_s12(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get poolformer_s12 model. +Refer to the base class models.PoolFormer for more details.

+ +
+ Source code in mindcv/models/poolformer.py +
324
+325
+326
+327
+328
+329
+330
+331
+332
@register_model
+def poolformer_s12(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolFormer:
+    """Get poolformer_s12 model.
+    Refer to the base class `models.PoolFormer` for more details."""
+    default_cfg = default_cfgs["poolformer_s12"]
+    model = PoolFormer(in_chans=in_channels, num_classes=num_classes, layers=(2, 2, 6, 2), **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.poolformer.poolformer_s24(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get poolformer_s24 model. +Refer to the base class models.PoolFormer for more details.

+ +
+ Source code in mindcv/models/poolformer.py +
335
+336
+337
+338
+339
+340
+341
+342
+343
@register_model
+def poolformer_s24(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolFormer:
+    """Get poolformer_s24 model.
+    Refer to the base class `models.PoolFormer` for more details."""
+    default_cfg = default_cfgs["poolformer_s24"]
+    model = PoolFormer(in_chans=in_channels, num_classes=num_classes, layers=(4, 4, 12, 4), **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.poolformer.poolformer_s36(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get poolformer_s36 model. +Refer to the base class models.PoolFormer for more details.

+ +
+ Source code in mindcv/models/poolformer.py +
346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
@register_model
+def poolformer_s36(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs) -> PoolFormer:
+    """Get poolformer_s36 model.
+    Refer to the base class `models.PoolFormer` for more details."""
+    default_cfg = default_cfgs["poolformer_s36"]
+    model = PoolFormer(
+        in_chans=in_channels, num_classes=num_classes, layers=(6, 6, 18, 6), layer_scale_init_value=1e-6, **kwargs
+    )
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

pvt

+ + +
+ + + +

+ mindcv.models.pvt.PyramidVisionTransformer + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Pyramid Vision Transformer model class, based on +"Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions" <https://arxiv.org/abs/2102.12122>_ # noqa: E501

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
img_size(int) +
+

size of a input image.

+
+

+

+
patch_size +
+

size of a single image patch.

+
+

+ + TYPE: + int) + + + DEFAULT: + 4 + +

+
in_chans +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int) + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int) + + + DEFAULT: + 1000 + +

+
embed_dims +
+

how many hidden dim in each PatchEmbed.

+
+

+ + TYPE: + list) + + + DEFAULT: + [64, 128, 320, 512] + +

+
num_heads +
+

number of attention head in each stage.

+
+

+ + TYPE: + list) + + + DEFAULT: + [1, 2, 5, 8] + +

+
mlp_ratios +
+

ratios of MLP hidden dims in each stage.

+
+

+ + TYPE: + list + + + DEFAULT: + [8, 8, 4, 4] + +

+
qkv_bias(bool) +
+

use bias in attention.

+
+

+

+
qk_scale(float) +
+

Scale multiplied by qk in attention(if not none), otherwise head_dim ** -0.5.

+
+

+

+
drop_rate(float) +
+

The drop rate for each block. Default: 0.0.

+
+

+

+
attn_drop_rate(float) +
+

The drop rate for attention. Default: 0.0.

+
+

+

+
drop_path_rate(float) +
+

The drop rate for drop path. Default: 0.0.

+
+

+

+
norm_layer(nn.Cell) +
+

Norm layer that will be used in blocks. Default: nn.LayerNorm.

+
+

+

+
depths +
+

number of Blocks.

+
+

+ + TYPE: + list) + + + DEFAULT: + [2, 2, 2, 2] + +

+
sr_ratios(list) +
+

stride and kernel size of each attention.

+
+

+

+
num_stages(int) +
+

number of stage. Default: 4.

+
+

+

+
+ +
+ Source code in mindcv/models/pvt.py +
170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
class PyramidVisionTransformer(nn.Cell):
+    r"""Pyramid Vision Transformer model class, based on
+    `"Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions" <https://arxiv.org/abs/2102.12122>`_  # noqa: E501
+
+    Args:
+        img_size(int) : size of a input image.
+        patch_size (int) : size of a single image patch.
+        in_chans (int) : number the channels of the input. Default: 3.
+        num_classes (int) : number of classification classes. Default: 1000.
+        embed_dims (list) : how many hidden dim in each PatchEmbed.
+        num_heads (list) : number of attention head in each stage.
+        mlp_ratios (list): ratios of MLP hidden dims in each stage.
+        qkv_bias(bool) : use bias in attention.
+        qk_scale(float) : Scale multiplied by qk in attention(if not none), otherwise head_dim ** -0.5.
+        drop_rate(float) : The drop rate for each block. Default: 0.0.
+        attn_drop_rate(float) : The drop rate for attention. Default: 0.0.
+        drop_path_rate(float) : The drop rate for drop path. Default: 0.0.
+        norm_layer(nn.Cell) : Norm layer that will be used in blocks. Default: nn.LayerNorm.
+        depths (list) : number of Blocks.
+        sr_ratios(list) : stride and kernel size of each attention.
+        num_stages(int) : number of stage. Default: 4.
+    """
+
+    def __init__(self, img_size=224, patch_size=4, in_chans=3, num_classes=1000, embed_dims=[64, 128, 320, 512],
+                 num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True, qk_scale=None, drop_rate=0.0,
+                 attn_drop_rate=0.0, drop_path_rate=0.0, norm_layer=nn.LayerNorm,
+                 depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1], num_stages=4):
+        super(PyramidVisionTransformer, self).__init__()
+        self.num_classes = num_classes
+        self.depths = depths
+        self.num_stages = num_stages
+        start = Tensor(0, mindspore.float32)
+        stop = Tensor(drop_path_rate, mindspore.float32)
+        dpr = [float(x) for x in ops.linspace(start, stop, sum(depths))]  # stochastic depth decay rule
+        cur = 0
+        b_list = []
+        self.pos_embed = []
+        self.pos_drop = Dropout(p=drop_rate)
+        for i in range(num_stages):
+            block = nn.CellList(
+                [Block(dim=embed_dims[i], num_heads=num_heads[i], mlp_ratio=mlp_ratios[i], qkv_bias=qkv_bias,
+                       qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + j],
+                       norm_layer=norm_layer, sr_ratio=sr_ratios[i])
+                 for j in range(depths[i])
+                 ])
+
+            b_list.append(block)
+            cur += depths[0]
+
+        self.patch_embed1 = PatchEmbed(img_size=img_size,
+                                       patch_size=patch_size,
+                                       in_chans=in_chans,
+                                       embed_dim=embed_dims[0])
+        num_patches = self.patch_embed1.num_patches
+        self.pos_embed1 = mindspore.Parameter(ops.zeros((1, num_patches, embed_dims[0]), mindspore.float16))
+        self.pos_drop1 = Dropout(p=drop_rate)
+
+        self.patch_embed2 = PatchEmbed(img_size=img_size // (2 ** (1 + 1)),
+                                       patch_size=2,
+                                       in_chans=embed_dims[1 - 1],
+                                       embed_dim=embed_dims[1])
+        num_patches = self.patch_embed2.num_patches
+        self.pos_embed2 = mindspore.Parameter(ops.zeros((1, num_patches, embed_dims[1]), mindspore.float16))
+        self.pos_drop2 = Dropout(p=drop_rate)
+
+        self.patch_embed3 = PatchEmbed(img_size=img_size // (2 ** (2 + 1)),
+                                       patch_size=2,
+                                       in_chans=embed_dims[2 - 1],
+                                       embed_dim=embed_dims[2])
+        num_patches = self.patch_embed3.num_patches
+        self.pos_embed3 = mindspore.Parameter(ops.zeros((1, num_patches, embed_dims[2]), mindspore.float16))
+        self.pos_drop3 = Dropout(p=drop_rate)
+
+        self.patch_embed4 = PatchEmbed(img_size // (2 ** (3 + 1)),
+                                       patch_size=2,
+                                       in_chans=embed_dims[3 - 1],
+                                       embed_dim=embed_dims[3])
+        num_patches = self.patch_embed4.num_patches + 1
+        self.pos_embed4 = mindspore.Parameter(ops.zeros((1, num_patches, embed_dims[3]), mindspore.float16))
+        self.pos_drop4 = Dropout(p=drop_rate)
+        self.Blocks = nn.CellList(b_list)
+
+        self.norm = norm_layer([embed_dims[3]])
+
+        # cls_token
+        self.cls_token = mindspore.Parameter(ops.zeros((1, 1, embed_dims[3]), mindspore.float32))
+
+        # classification head
+        self.head = nn.Dense(embed_dims[3], num_classes) if num_classes > 0 else Identity()
+        self.reshape = ops.reshape
+        self.transpose = ops.transpose
+        self.tile = ops.Tile()
+        self.Concat = ops.Concat(axis=1)
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Dense):
+                cell.weight.set_data(weight_init.initializer(weight_init.TruncatedNormal(sigma=0.02),
+                                                             cell.weight.shape, cell.weight.dtype))
+                if isinstance(cell, nn.Dense) and cell.bias is not None:
+                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.LayerNorm):
+                cell.gamma.set_data(weight_init.initializer(weight_init.One(), cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(weight_init.initializer(weight_init.Zero(), cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Conv2d):
+                fan_out = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels
+                fan_out //= cell.group
+                cell.weight.set_data(weight_init.initializer(weight_init.Normal(sigma=math.sqrt(2.0 / fan_out)),
+                                                             cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))
+
+    def get_classifier(self):
+        return self.head
+
+    def reset_classifier(self, num_classes, global_pool=""):
+        self.num_classes = num_classes
+        self.head = nn.Dense(self.embed_dim, num_classes) if num_classes > 0 else Identity()
+
+    def _get_pos_embed(self, pos_embed, ph, pw, H, W):
+        if H * W == self.patch_embed1.num_patches:
+            return pos_embed
+        else:
+            pos_embed = self.transpose(self.reshape(pos_embed, (1, ph, pw, -1)), (0, 3, 1, 2))
+            resize_bilinear = ops.ResizeBilinear((H, W))
+            pos_embed = resize_bilinear(pos_embed)
+
+            pos_embed = self.transpose(self.reshape(pos_embed, (1, -1, H * W)), (0, 2, 1))
+
+            return pos_embed
+
+    def forward_features(self, x):
+        B = x.shape[0]
+
+        x, (H, W) = self.patch_embed1(x)
+        pos_embed = self.pos_embed1
+        x = self.pos_drop1(x + pos_embed)
+        for blk in self.Blocks[0]:
+            x = blk(x, H, W)
+        x = self.transpose(self.reshape(x, (B, H, W, -1)), (0, 3, 1, 2))
+
+        x, (H, W) = self.patch_embed2(x)
+        ph, pw = self.patch_embed2.H, self.patch_embed2.W
+        pos_embed = self._get_pos_embed(self.pos_embed2, ph, pw, H, W)
+        x = self.pos_drop2(x + pos_embed)
+        for blk in self.Blocks[1]:
+            x = blk(x, H, W)
+        x = self.transpose(self.reshape(x, (B, H, W, -1)), (0, 3, 1, 2))
+
+        x, (H, W) = self.patch_embed3(x)
+        ph, pw = self.patch_embed3.H, self.patch_embed3.W
+        pos_embed = self._get_pos_embed(self.pos_embed3, ph, pw, H, W)
+        x = self.pos_drop3(x + pos_embed)
+        for blk in self.Blocks[2]:
+            x = blk(x, H, W)
+        x = self.transpose(self.reshape(x, (B, H, W, -1)), (0, 3, 1, 2))
+
+        x, (H, W) = self.patch_embed4(x)
+        cls_tokens = self.tile(self.cls_token, (B, 1, 1))
+
+        x = self.Concat((cls_tokens, x))
+        ph, pw = self.patch_embed4.H, self.patch_embed4.W
+        pos_embed_ = self._get_pos_embed(self.pos_embed4[:, 1:], ph, pw, H, W)
+        pos_embed = self.Concat((self.pos_embed4[:, 0:1], pos_embed_))
+        x = self.pos_drop4(x + pos_embed)
+        for blk in self.Blocks[3]:
+            x = blk(x, H, W)
+
+        x = self.norm(x)
+
+        return x[:, 0]
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        return self.head(x)
+
+    def construct(self, x):
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.pvt.pvt_large(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVT large model +Refer to the base class "models.PVT" for more details.

+ +
+ Source code in mindcv/models/pvt.py +
413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
@register_model
+def pvt_large(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformer:
+    """Get PVT large model
+    Refer to the base class "models.PVT" for more details.
+    """
+    default_cfg = default_cfgs['pvt_large']
+    model = PyramidVisionTransformer(in_chans=in_channels, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],
+                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 8, 27, 3],
+                                     sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvt.pvt_medium(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVT medium model +Refer to the base class "models.PVT" for more details.

+ +
+ Source code in mindcv/models/pvt.py +
393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
@register_model
+def pvt_medium(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformer:
+    """Get PVT medium model
+    Refer to the base class "models.PVT" for more details.
+    """
+    default_cfg = default_cfgs['pvt_medium']
+    model = PyramidVisionTransformer(in_chans=in_channels, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],
+                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 4, 18, 3],
+                                     sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvt.pvt_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVT small model +Refer to the base class "models.PVT" for more details.

+ +
+ Source code in mindcv/models/pvt.py +
373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
@register_model
+def pvt_small(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformer:
+    """Get PVT small model
+    Refer to the base class "models.PVT" for more details.
+    """
+    default_cfg = default_cfgs['pvt_small']
+    model = PyramidVisionTransformer(in_chans=in_channels, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],
+                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 4, 6, 3],
+                                     sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvt.pvt_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVT tiny model +Refer to the base class "models.PVT" for more details.

+ +
+ Source code in mindcv/models/pvt.py +
353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
@register_model
+def pvt_tiny(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformer:
+    """Get PVT tiny model
+    Refer to the base class "models.PVT" for more details.
+    """
+    default_cfg = default_cfgs['pvt_tiny']
+    model = PyramidVisionTransformer(in_chans=in_channels, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],
+                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[2, 2, 2, 2],
+                                     sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

pvtv2

+ + +
+ + + +

+ mindcv.models.pvtv2.PyramidVisionTransformerV2 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Pyramid Vision Transformer V2 model class, based on +"PVTv2: Improved Baselines with Pyramid Vision Transformer" <https://arxiv.org/abs/2106.13797>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
img_size(int) +
+

size of a input image.

+
+

+

+
patch_size +
+

size of a single image patch.

+
+

+ + TYPE: + int) + + + DEFAULT: + 16 + +

+
in_chans +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int) + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int) + + + DEFAULT: + 1000 + +

+
embed_dims +
+

how many hidden dim in each PatchEmbed.

+
+

+ + TYPE: + list) + + + DEFAULT: + [64, 128, 256, 512] + +

+
num_heads +
+

number of attention head in each stage.

+
+

+ + TYPE: + list) + + + DEFAULT: + [1, 2, 4, 8] + +

+
mlp_ratios +
+

ratios of MLP hidden dims in each stage.

+
+

+ + TYPE: + list + + + DEFAULT: + [4, 4, 4, 4] + +

+
qkv_bias(bool) +
+

use bias in attention.

+
+

+

+
qk_scale(float) +
+

Scale multiplied by qk in attention(if not none), otherwise head_dim ** -0.5.

+
+

+

+
drop_rate(float) +
+

The drop rate for each block. Default: 0.0.

+
+

+

+
attn_drop_rate(float) +
+

The drop rate for attention. Default: 0.0.

+
+

+

+
drop_path_rate(float) +
+

The drop rate for drop path. Default: 0.0.

+
+

+

+
norm_layer(nn.Cell) +
+

Norm layer that will be used in blocks. Default: nn.LayerNorm.

+
+

+

+
depths +
+

number of Blocks.

+
+

+ + TYPE: + list) + + + DEFAULT: + [3, 4, 6, 3] + +

+
sr_ratios(list) +
+

stride and kernel size of each attention.

+
+

+

+
num_stages(int) +
+

number of stage. Default: 4.

+
+

+

+
linear(bool) +
+

use linear SRA.

+
+

+

+
+ +
+ Source code in mindcv/models/pvtv2.py +
226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
class PyramidVisionTransformerV2(nn.Cell):
+    r"""Pyramid Vision Transformer V2 model class, based on
+    `"PVTv2: Improved Baselines with Pyramid Vision Transformer" <https://arxiv.org/abs/2106.13797>`_
+
+    Args:
+        img_size(int) : size of a input image.
+        patch_size (int) : size of a single image patch.
+        in_chans (int) : number the channels of the input. Default: 3.
+        num_classes (int) : number of classification classes. Default: 1000.
+        embed_dims (list) : how many hidden dim in each PatchEmbed.
+        num_heads (list) : number of attention head in each stage.
+        mlp_ratios (list): ratios of MLP hidden dims in each stage.
+        qkv_bias(bool) : use bias in attention.
+        qk_scale(float) : Scale multiplied by qk in attention(if not none), otherwise head_dim ** -0.5.
+        drop_rate(float) : The drop rate for each block. Default: 0.0.
+        attn_drop_rate(float) : The drop rate for attention. Default: 0.0.
+        drop_path_rate(float) : The drop rate for drop path. Default: 0.0.
+        norm_layer(nn.Cell) : Norm layer that will be used in blocks. Default: nn.LayerNorm.
+        depths (list) : number of Blocks.
+        sr_ratios(list) : stride and kernel size of each attention.
+        num_stages(int) : number of stage. Default: 4.
+        linear(bool) :  use linear SRA.
+    """
+
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dims=[64, 128, 256, 512],
+                 num_heads=[1, 2, 4, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=False, qk_scale=None, drop_rate=0.,
+                 attn_drop_rate=0., drop_path_rate=0., norm_layer=nn.LayerNorm,
+                 depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1], num_stages=4, linear=False):
+        super().__init__()
+        self.num_classes = num_classes
+        self.depths = depths
+        self.num_stages = num_stages
+
+        start = Tensor(0, mindspore.float32)
+        stop = Tensor(drop_path_rate, mindspore.float32)
+        dpr = [float(x) for x in ops.linspace(start, stop, sum(depths))]  # stochastic depth decay rule
+        cur = 0
+
+        patch_embed_list = []
+        block_list = []
+        norm_list = []
+
+        for i in range(num_stages):
+            patch_embed = OverlapPatchEmbed(img_size=img_size if i == 0 else img_size // (2 ** (i + 1)),
+                                            patch_size=7 if i == 0 else 3,
+                                            stride=4 if i == 0 else 2,
+                                            in_chans=in_chans if i == 0 else embed_dims[i - 1],
+                                            embed_dim=embed_dims[i])
+
+            block = nn.CellList([Block(
+                dim=embed_dims[i], num_heads=num_heads[i], mlp_ratio=mlp_ratios[i], qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + j], norm_layer=norm_layer,
+                sr_ratio=sr_ratios[i], linear=linear, block_id=j)
+                for j in range(depths[i])])
+
+            norm = norm_layer([embed_dims[i]])
+
+            cur += depths[i]
+
+            patch_embed_list.append(patch_embed)
+            block_list.append(block)
+            norm_list.append(norm)
+        self.patch_embed_list = nn.CellList(patch_embed_list)
+        self.block_list = nn.CellList(block_list)
+        self.norm_list = nn.CellList(norm_list)
+        # classification head
+        self.head = nn.Dense(embed_dims[3], num_classes) if num_classes > 0 else Identity()
+        self._initialize_weights()
+
+    def freeze_patch_emb(self):
+        self.patch_embed_list[0].requires_grad = False
+
+    def _initialize_weights(self):
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Dense):
+                cell.weight.set_data(weight_init.initializer(weight_init.TruncatedNormal(sigma=0.02),
+                                                             cell.weight.shape, cell.weight.dtype))
+                if isinstance(cell, nn.Dense) and cell.bias is not None:
+                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.LayerNorm):
+                cell.gamma.set_data(weight_init.initializer(weight_init.One(), cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(weight_init.initializer(weight_init.Zero(), cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Conv2d):
+                fan_out = cell.kernel_size[0] * cell.kernel_size[1] * cell.out_channels
+                fan_out //= cell.group
+                cell.weight.set_data(weight_init.initializer(weight_init.Normal(sigma=math.sqrt(2.0 / fan_out)),
+                                                             cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(weight_init.initializer(weight_init.Zero(), cell.bias.shape, cell.bias.dtype))
+
+    def get_classifier(self):
+        return self.head
+
+    def reset_classifier(self, num_classes, global_pool=""):
+        self.num_classes = num_classes
+        self.head = nn.Dense(self.embed_dim, num_classes) if num_classes > 0 else Identity()
+
+    def forward_features(self, x):
+        B = x.shape[0]
+
+        for i in range(self.num_stages):
+            patch_embed = self.patch_embed_list[i]
+            block = self.block_list[i]
+            norm = self.norm_list[i]
+            x, H, W = patch_embed(x)
+            for blk in block:
+                x = blk(x, H, W)
+            x = norm(x)
+            if i != self.num_stages - 1:
+                x = ops.transpose(ops.reshape(x, (B, H, W, -1)), (0, 3, 1, 2))
+
+        return x.mean(axis=1)
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        return self.head(x)
+
+    def construct(self, x):
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.pvtv2.pvt_v2_b0(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVTV2-b0 model +Refer to the base class "models.PVTv2" for more details.

+ +
+ Source code in mindcv/models/pvtv2.py +
350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
@register_model
+def pvt_v2_b0(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformerV2:
+    """Get PVTV2-b0 model
+    Refer to the base class "models.PVTv2" for more details.
+    """
+    default_cfg = default_cfgs["pvt_v2_b0"]
+    model = PyramidVisionTransformerV2(
+        in_chans=in_channels, num_classes=num_classes,
+        patch_size=4, embed_dims=[32, 64, 160, 256], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvtv2.pvt_v2_b1(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVTV2-b1 model +Refer to the base class "models.PVTv2" for more details.

+ +
+ Source code in mindcv/models/pvtv2.py +
369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
@register_model
+def pvt_v2_b1(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformerV2:
+    """Get PVTV2-b1 model
+    Refer to the base class "models.PVTv2" for more details.
+    """
+    default_cfg = default_cfgs["pvt_v2_b1"]
+    model = PyramidVisionTransformerV2(
+        in_chans=in_channels, num_classes=num_classes,
+        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvtv2.pvt_v2_b2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVTV2-b2 model +Refer to the base class "models.PVTv2" for more details.

+ +
+ Source code in mindcv/models/pvtv2.py +
388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
@register_model
+def pvt_v2_b2(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformerV2:
+    """Get PVTV2-b2 model
+    Refer to the base class "models.PVTv2" for more details.
+    """
+    default_cfg = default_cfgs["pvt_v2_b2"]
+    model = PyramidVisionTransformerV2(
+        in_chans=in_channels, num_classes=num_classes,
+        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvtv2.pvt_v2_b3(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVTV2-b3 model +Refer to the base class "models.PVTv2" for more details.

+ +
+ Source code in mindcv/models/pvtv2.py +
407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
@register_model
+def pvt_v2_b3(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformerV2:
+    """Get PVTV2-b3 model
+    Refer to the base class "models.PVTv2" for more details.
+    """
+    default_cfg = default_cfgs["pvt_v2_b3"]
+    model = PyramidVisionTransformerV2(
+        in_chans=in_channels, num_classes=num_classes,
+        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 4, 18, 3], sr_ratios=[8, 4, 2, 1], **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvtv2.pvt_v2_b4(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVTV2-b4 model +Refer to the base class "models.PVTv2" for more details.

+ +
+ Source code in mindcv/models/pvtv2.py +
425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
@register_model
+def pvt_v2_b4(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformerV2:
+    """Get PVTV2-b4 model
+    Refer to the base class "models.PVTv2" for more details.
+    """
+    default_cfg = default_cfgs["pvt_v2_b4"]
+    model = PyramidVisionTransformerV2(
+        in_chans=in_channels, num_classes=num_classes,
+        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 8, 27, 3], sr_ratios=[8, 4, 2, 1], **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.pvtv2.pvt_v2_b5(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get PVTV2-b5 model +Refer to the base class "models.PVTv2" for more details.

+ +
+ Source code in mindcv/models/pvtv2.py +
443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
@register_model
+def pvt_v2_b5(
+    pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs
+) -> PyramidVisionTransformerV2:
+    """Get PVTV2-b5 model
+    Refer to the base class "models.PVTv2" for more details.
+    """
+    default_cfg = default_cfgs["pvt_v2_b5"]
+    model = PyramidVisionTransformerV2(
+        in_chans=in_channels, num_classes=num_classes,
+        patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), depths=[3, 6, 40, 3], sr_ratios=[8, 4, 2, 1], **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

regnet

+ + + +
+ + + +

+mindcv.models.regnet.regnet_x_12gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
592
+593
+594
+595
+596
+597
+598
+599
@register_model
+def regnet_x_12gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_12gf"]
+    model = RegNet(73.36, 168, 2.37, 19, 112, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_16gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
602
+603
+604
+605
+606
+607
+608
+609
@register_model
+def regnet_x_16gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_16gf"]
+    model = RegNet(55.59, 216, 2.1, 22, 128, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_1_6gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
542
+543
+544
+545
+546
+547
+548
+549
@register_model
+def regnet_x_1_6gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_1_6gf"]
+    model = RegNet(34.01, 80, 2.25, 18, 24, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_200mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
502
+503
+504
+505
+506
+507
+508
+509
@register_model
+def regnet_x_200mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_200mf"]
+    model = RegNet(36.44, 24, 2.49, 13, 8, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_32gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
612
+613
+614
+615
+616
+617
+618
+619
@register_model
+def regnet_x_32gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_32gf"]
+    model = RegNet(69.86, 320, 2.0, 23, 168, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_3_2gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
552
+553
+554
+555
+556
+557
+558
+559
@register_model
+def regnet_x_3_2gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_3_2gf"]
+    model = RegNet(26.31, 88, 2.25, 25, 48, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_400mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
512
+513
+514
+515
+516
+517
+518
+519
@register_model
+def regnet_x_400mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_400mf"]
+    model = RegNet(24.48, 24, 2.54, 22, 16, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_4_0gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
562
+563
+564
+565
+566
+567
+568
+569
@register_model
+def regnet_x_4_0gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_4_0gf"]
+    model = RegNet(38.65, 96, 2.43, 23, 40, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_600mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
522
+523
+524
+525
+526
+527
+528
+529
@register_model
+def regnet_x_600mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_600mf"]
+    model = RegNet(36.97, 48, 2.24, 16, 24, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_6_4gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
572
+573
+574
+575
+576
+577
+578
+579
@register_model
+def regnet_x_6_4gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_6_4gf"]
+    model = RegNet(60.83, 184, 2.07, 17, 56, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_800mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
532
+533
+534
+535
+536
+537
+538
+539
@register_model
+def regnet_x_800mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_800mf"]
+    model = RegNet(35.73, 56, 2.28, 16, 16, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_x_8_0gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
582
+583
+584
+585
+586
+587
+588
+589
@register_model
+def regnet_x_8_0gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_x_8_0gf"]
+    model = RegNet(49.56, 80, 2.88, 23, 120, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_12gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
712
+713
+714
+715
+716
+717
+718
+719
@register_model
+def regnet_y_12gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_12gf"]
+    model = RegNet(73.36, 168, 2.37, 19, 112, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_16gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
722
+723
+724
+725
+726
+727
+728
+729
@register_model
+def regnet_y_16gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_16gf"]
+    model = RegNet(106.23, 200, 2.48, 18, 112, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_1_6gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
662
+663
+664
+665
+666
+667
+668
+669
@register_model
+def regnet_y_1_6gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_1_6gf"]
+    model = RegNet(20.71, 48, 2.65, 27, 24, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_200mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
622
+623
+624
+625
+626
+627
+628
+629
@register_model
+def regnet_y_200mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_200mf"]
+    model = RegNet(36.44, 24, 2.49, 13, 8, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_32gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
732
+733
+734
+735
+736
+737
+738
+739
@register_model
+def regnet_y_32gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_32gf"]
+    model = RegNet(115.89, 232, 2.53, 20, 232, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_3_2gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
672
+673
+674
+675
+676
+677
+678
+679
@register_model
+def regnet_y_3_2gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_3_2gf"]
+    model = RegNet(42.63, 80, 2.66, 21, 24, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_400mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
632
+633
+634
+635
+636
+637
+638
+639
@register_model
+def regnet_y_400mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_400mf"]
+    model = RegNet(27.89, 48, 2.09, 16, 8, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_4_0gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
682
+683
+684
+685
+686
+687
+688
+689
@register_model
+def regnet_y_4_0gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_4_0gf"]
+    model = RegNet(31.41, 96, 2.24, 22, 64, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_600mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
642
+643
+644
+645
+646
+647
+648
+649
@register_model
+def regnet_y_600mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_600mf"]
+    model = RegNet(32.54, 48, 2.32, 15, 16, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_6_4gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
692
+693
+694
+695
+696
+697
+698
+699
@register_model
+def regnet_y_6_4gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_6_4gf"]
+    model = RegNet(33.22, 112, 2.27, 25, 72, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_800mf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
652
+653
+654
+655
+656
+657
+658
+659
@register_model
+def regnet_y_800mf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_800mf"]
+    model = RegNet(38.84, 56, 2.4, 14, 16, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.regnet.regnet_y_8_0gf(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/regnet.py +
702
+703
+704
+705
+706
+707
+708
+709
@register_model
+def regnet_y_8_0gf(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["regnet_y_8_0gf"]
+    model = RegNet(76.82, 192, 2.19, 17, 56, se_r=0.25, num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

repmlp

+ + +
+ + + +

+ mindcv.models.repmlp.RepMLPNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

RepMLPNet model class, based on +"RepMLPNet: Hierarchical Vision MLP with Re-parameterized Locality" <https://arxiv.org/pdf/2112.11081v2.pdf>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
in_channels +
+

number of input channels. Default: 3.

+
+

+ + DEFAULT: + 3 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+

+
patch_size +
+

size of a single image patch. Default: (4, 4)

+
+

+ + DEFAULT: + (4, 4) + +

+
num_blocks +
+

number of blocks per stage. Default: (2,2,6,2)

+
+

+ + DEFAULT: + (2, 2, 6, 2) + +

+
channels +
+

number of in_channels(channels[stage_idx]) and out_channels(channels[stage_idx + 1]) per stage. +Default: (192,384,768,1536)

+
+

+ + DEFAULT: + (192, 384, 768, 1536) + +

+
hs +
+

height of picture per stage. Default: (64,32,16,8)

+
+

+ + DEFAULT: + (64, 32, 16, 8) + +

+
ws +
+

width of picture per stage. Default: (64,32,16,8)

+
+

+ + DEFAULT: + (64, 32, 16, 8) + +

+
sharesets_nums +
+

number of share sets per stage. Default: (4,8,16,32)

+
+

+ + DEFAULT: + (4, 8, 16, 32) + +

+
reparam_conv_k +
+

convolution kernel size in local Perceptron. Default: (3,)

+
+

+ + DEFAULT: + (3) + +

+
globalperceptron_reduce +
+

Intermediate convolution output size +(in_channal = inchannal, out_channel = in_channel/globalperceptron_reduce) in globalperceptron. Default: 4

+
+

+ + DEFAULT: + 4 + +

+
use_checkpoint +
+

whether to use checkpoint

+
+

+ + DEFAULT: + False + +

+
deploy +
+

whether to use bias

+
+

+ + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/repmlp.py +
276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
class RepMLPNet(nn.Cell):
+    r"""RepMLPNet model class, based on
+    `"RepMLPNet: Hierarchical Vision MLP with Re-parameterized Locality" <https://arxiv.org/pdf/2112.11081v2.pdf>`_
+
+    Args:
+        in_channels: number of input channels. Default: 3.
+        num_classes: number of classification classes. Default: 1000.
+        patch_size: size of a single image patch. Default: (4, 4)
+        num_blocks: number of blocks per stage. Default: (2,2,6,2)
+        channels: number of in_channels(channels[stage_idx]) and out_channels(channels[stage_idx + 1]) per stage.
+            Default: (192,384,768,1536)
+        hs: height of picture per stage. Default: (64,32,16,8)
+        ws: width of picture per stage. Default: (64,32,16,8)
+        sharesets_nums: number of share sets per stage. Default: (4,8,16,32)
+        reparam_conv_k: convolution kernel size in local Perceptron. Default: (3,)
+        globalperceptron_reduce: Intermediate convolution output size
+            (in_channal = inchannal, out_channel = in_channel/globalperceptron_reduce) in globalperceptron. Default: 4
+        use_checkpoint: whether to use checkpoint
+        deploy: whether to use bias
+    """
+
+    def __init__(self,
+                 in_channels=3, num_class=1000,
+                 patch_size=(4, 4),
+                 num_blocks=(2, 2, 6, 2), channels=(192, 384, 768, 1536),
+                 hs=(64, 32, 16, 8), ws=(64, 32, 16, 8),
+                 sharesets_nums=(4, 8, 16, 32),
+                 reparam_conv_k=(3,),
+                 globalperceptron_reduce=4, use_checkpoint=False,
+                 deploy=False):
+        super().__init__()
+        num_stages = len(num_blocks)
+        assert num_stages == len(channels)
+        assert num_stages == len(hs)
+        assert num_stages == len(ws)
+        assert num_stages == len(sharesets_nums)
+
+        self.conv_embedding = conv_bn_relu(in_channels, channels[0], kernel_size=patch_size, stride=patch_size,
+                                           padding=0, has_bias=False)
+        self.conv2d = nn.Conv2d(in_channels, channels[0], kernel_size=patch_size, stride=patch_size, padding=0)
+
+        stages = []
+        embeds = []
+        for stage_idx in range(num_stages):
+            stage_blocks = [RepMLPNetUnit(channels=channels[stage_idx], h=hs[stage_idx], w=ws[stage_idx],
+                                          reparam_conv_k=reparam_conv_k,
+                                          globalperceptron_reduce=globalperceptron_reduce, ffn_expand=4,
+                                          num_sharesets=sharesets_nums[stage_idx],
+                                          deploy=deploy) for _ in range(num_blocks[stage_idx])]
+            stages.append(nn.CellList(stage_blocks))
+            if stage_idx < num_stages - 1:
+                embeds.append(
+                    conv_bn_relu(in_channels=channels[stage_idx], out_channels=channels[stage_idx + 1], kernel_size=2,
+                                 stride=2, padding=0))
+        self.stages = nn.CellList(stages)
+        self.embeds = nn.CellList(embeds)
+        self.head_norm = nn.BatchNorm2d(channels[-1]).set_train()
+        self.head = nn.Dense(channels[-1], num_class)
+
+        self.use_checkpoint = use_checkpoint
+        self.shape = ops.Shape()
+        self.reshape = ops.Reshape()
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        """Initialize weights for cells."""
+        for name, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                k = cell.group / (cell.in_channels * cell.kernel_size[0] * cell.kernel_size[1])
+                k = k ** 0.5
+                cell.weight.set_data(init.initializer(init.Uniform(k), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Uniform(k), cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.Dense):
+                k = 1 / cell.in_channels
+                k = k ** 0.5
+                cell.weight.set_data(init.initializer(init.Uniform(k), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Uniform(k), cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.conv_embedding(x)
+
+        for i, stage in enumerate(self.stages):
+            for block in stage:
+                x = block(x)
+
+            if i < len(self.stages) - 1:
+                embed = self.embeds[i]
+                x = embed(x)
+        x = self.head_norm(x)
+        shape = self.shape(x)
+        pool = nn.AvgPool2d(kernel_size=(shape[2], shape[3]))
+        x = pool(x)
+        return x.view(shape[0], -1)
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        return self.head(x)
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        return self.forward_head(x)
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.repmlp.repmlp_b224(pretrained=False, image_size=224, num_classes=1000, in_channels=3, deploy=False, **kwargs) + +

+ + +
+ +

Get repmlp_b224 model. +Refer to the base class models.RepMLPNet for more details.

+ +
+ Source code in mindcv/models/repmlp.py +
418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
@register_model
+def repmlp_b224(pretrained: bool = False, image_size: int = 224, num_classes: int = 1000, in_channels=3,
+                deploy=False, **kwargs):
+    """Get repmlp_b224 model.
+    Refer to the base class `models.RepMLPNet` for more details."""
+    default_cfg = default_cfgs["repmlp_b224"]
+    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(96, 192, 384, 768), hs=(56, 28, 14, 7),
+                      ws=(56, 28, 14, 7),
+                      num_blocks=(2, 2, 12, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 32, 128),
+                      deploy=deploy)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repmlp.repmlp_b256(pretrained=False, image_size=256, num_classes=1000, in_channels=3, deploy=False, **kwargs) + +

+ + +
+ +

Get repmlp_b256 model. +Refer to the base class models.RepMLPNet for more details.

+ +
+ Source code in mindcv/models/repmlp.py +
434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
@register_model
+def repmlp_b256(pretrained: bool = False, image_size: int = 256, num_classes: int = 1000, in_channels=3,
+                deploy=False, **kwargs):
+    """Get repmlp_b256 model.
+    Refer to the base class `models.RepMLPNet` for more details."""
+    default_cfg = default_cfgs["repmlp_b256"]
+    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(96, 192, 384, 768), hs=(64, 32, 16, 8),
+                      ws=(64, 32, 16, 8),
+                      num_blocks=(2, 2, 12, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 32, 128),
+                      deploy=deploy)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repmlp.repmlp_d256(pretrained=False, image_size=256, num_classes=1000, in_channels=3, deploy=False, **kwargs) + +

+ + +
+ +

Get repmlp_d256 model. +Refer to the base class models.RepMLPNet for more details.

+ +
+ Source code in mindcv/models/repmlp.py +
450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
@register_model
+def repmlp_d256(pretrained: bool = False, image_size: int = 256, num_classes: int = 1000, in_channels=3,
+                deploy=False, **kwargs):
+    """Get repmlp_d256 model.
+    Refer to the base class `models.RepMLPNet` for more details."""
+    default_cfg = default_cfgs["repmlp_d256"]
+    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(80, 160, 320, 640), hs=(64, 32, 16, 8),
+                      ws=(64, 32, 16, 8),
+                      num_blocks=(2, 2, 18, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 16, 128),
+                      deploy=deploy)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repmlp.repmlp_l256(pretrained=False, image_size=256, num_classes=1000, in_channels=3, deploy=False, **kwargs) + +

+ + +
+ +

Get repmlp_l256 model. +Refer to the base class models.RepMLPNet for more details.

+ +
+ Source code in mindcv/models/repmlp.py +
466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
@register_model
+def repmlp_l256(pretrained: bool = False, image_size: int = 256, num_classes: int = 1000, in_channels=3,
+                deploy=False, **kwargs):
+    """Get repmlp_l256 model.
+    Refer to the base class `models.RepMLPNet` for more details."""
+    default_cfg = default_cfgs["repmlp_l256"]
+    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(96, 192, 384, 768), hs=(64, 32, 16, 8),
+                      ws=(64, 32, 16, 8),
+                      num_blocks=(2, 2, 18, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 32, 256),
+                      deploy=deploy)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repmlp.repmlp_t224(pretrained=False, image_size=224, num_classes=1000, in_channels=3, deploy=False, **kwargs) + +

+ + +
+ +

Get repmlp_t224 model. Refer to the base class models.RepMLPNet for more details.

+ +
+ Source code in mindcv/models/repmlp.py +
386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
@register_model
+def repmlp_t224(pretrained: bool = False, image_size: int = 224, num_classes: int = 1000, in_channels=3,
+                deploy=False, **kwargs):
+    """Get repmlp_t224 model. Refer to the base class `models.RepMLPNet` for more details."""
+    default_cfg = default_cfgs["repmlp_t224"]
+    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(64, 128, 256, 512), hs=(56, 28, 14, 7),
+                      ws=(56, 28, 14, 7),
+                      num_blocks=(2, 2, 6, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 16, 128),
+                      deploy=deploy)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repmlp.repmlp_t256(pretrained=False, image_size=256, num_classes=1000, in_channels=3, deploy=False, **kwargs) + +

+ + +
+ +

Get repmlp_t256 model. +Refer to the base class models.RepMLPNet for more details.

+ +
+ Source code in mindcv/models/repmlp.py +
402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
@register_model
+def repmlp_t256(pretrained: bool = False, image_size: int = 256, num_classes: int = 1000, in_channels=3,
+                deploy=False, **kwargs):
+    """Get repmlp_t256 model.
+    Refer to the base class `models.RepMLPNet` for more details."""
+    default_cfg = default_cfgs["repmlp_t256"]
+    model = RepMLPNet(in_channels=in_channels, num_class=num_classes, channels=(64, 128, 256, 512), hs=(64, 32, 16, 8),
+                      ws=(64, 32, 16, 8),
+                      num_blocks=(2, 2, 6, 2), reparam_conv_k=(1, 3), sharesets_nums=(1, 4, 16, 128),
+                      deploy=deploy)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

repvgg

+ + +
+ + + +

+ mindcv.models.repvgg.RepVGG + + +

+ + +
+

+ Bases: nn.Cell

+ + +

RepVGG model class, based on +"RepVGGBlock: An all-MLP Architecture for Vision" <https://arxiv.org/pdf/2101.03697>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_blocks +
+

number of RepVGGBlocks

+
+

+ + TYPE: + list) + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int) + + + DEFAULT: + 1000 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + in_channels) + + + DEFAULT: + 3 + +

+
width_multiplier +
+

the numbers of MLP Architecture.

+
+

+ + TYPE: + list) + + + DEFAULT: + None + +

+
override_group_map +
+

the numbers of MLP Architecture.

+
+

+ + TYPE: + dict) + + + DEFAULT: + None + +

+
deploy +
+

use rbr_reparam block or not. Default: False

+
+

+ + TYPE: + bool) + + + DEFAULT: + False + +

+
use_se +
+

use se_block or not. Default: False

+
+

+ + TYPE: + bool) + + + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/repvgg.py +
201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
class RepVGG(nn.Cell):
+    r"""RepVGG model class, based on
+    `"RepVGGBlock: An all-MLP Architecture for Vision" <https://arxiv.org/pdf/2101.03697>`_
+
+    Args:
+        num_blocks (list) : number of RepVGGBlocks
+        num_classes (int) : number of classification classes. Default: 1000.
+        in_channels (in_channels) : number the channels of the input. Default: 3.
+        width_multiplier (list) : the numbers of MLP Architecture.
+        override_group_map (dict) : the numbers of MLP Architecture.
+        deploy (bool) : use rbr_reparam block or not. Default: False
+        use_se (bool) : use se_block or not. Default: False
+    """
+
+    def __init__(self, num_blocks, num_classes=1000, in_channels=3, width_multiplier=None, override_group_map=None,
+                 deploy=False, use_se=False):
+        super().__init__()
+
+        assert len(width_multiplier) == 4
+
+        self.deploy = deploy
+        self.override_group_map = override_group_map or {}
+        self.use_se = use_se
+
+        assert 0 not in self.override_group_map
+
+        self.in_planes = min(64, int(64 * width_multiplier[0]))
+
+        self.stage0 = RepVGGBlock(in_channels=in_channels, out_channels=self.in_planes, kernel_size=3, stride=2,
+                                  padding=1,
+                                  deploy=self.deploy, use_se=self.use_se)
+        self.feature_info = [dict(chs=self.in_planes, reduction=2, name="stage0")]
+        self.cur_layer_idx = 1
+        self.stage1 = self._make_stage(
+            int(64 * width_multiplier[0]), num_blocks[0], stride=2)
+        self.feature_info.append(dict(chs=int(64 * width_multiplier[0]), reduction=4, name="stage1"))
+        self.stage2 = self._make_stage(
+            int(128 * width_multiplier[1]), num_blocks[1], stride=2)
+        self.feature_info.append(dict(chs=int(128 * width_multiplier[1]), reduction=8, name="stage2"))
+        self.stage3 = self._make_stage(
+            int(256 * width_multiplier[2]), num_blocks[2], stride=2)
+        self.feature_info.append(dict(chs=int(256 * width_multiplier[2]), reduction=16, name="stage3"))
+        self.stage4 = self._make_stage(
+            int(512 * width_multiplier[3]), num_blocks[3], stride=2)
+        self.feature_info.append(dict(chs=int(512 * width_multiplier[3]), reduction=32, name="stage4"))
+        self.gap = GlobalAvgPooling()
+        self.linear = nn.Dense(int(512 * width_multiplier[3]), num_classes)
+        self._initialize_weights()
+
+    def _make_stage(self, planes, num_blocks, stride):
+        strides = [stride] + [1] * (num_blocks - 1)
+        blocks = []
+        for s in strides:
+            cur_group = self.override_group_map.get(self.cur_layer_idx, 1)
+            blocks.append(RepVGGBlock(in_channels=self.in_planes, out_channels=planes, kernel_size=3,
+                                      stride=s, padding=1, group=cur_group, deploy=self.deploy,
+                                      use_se=self.use_se))
+            self.in_planes = planes
+            self.cur_layer_idx += 1
+
+        return nn.SequentialCell(blocks)
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(mode='fan_out', nonlinearity='relu'),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer('zeros', cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer('ones', cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer('zeros', cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(mode='fan_in', nonlinearity='sigmoid'),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer('zeros', cell.bias.shape, cell.bias.dtype))
+
+    def construct(self, x):
+        x = self.stage0(x)
+        x = self.stage1(x)
+        x = self.stage2(x)
+        x = self.stage3(x)
+        x = self.stage4(x)
+        x = self.gap(x)
+        x = self.linear(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_a0(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[0.75, 0.75, 0.75, 2.5]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
298
+299
+300
+301
+302
+303
+304
+305
+306
@register_model
+def repvgg_a0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[0.75, 0.75, 0.75, 2.5].
+    Refer to the base class `models.RepVGG` for more details.
+    """
+    default_cfg = default_cfgs["repvgg_a0"]
+    model_args = dict(num_blocks=[2, 4, 14, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[0.75, 0.75, 0.75, 2.5], override_group_map=None, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_a1(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[1.0, 1.0, 1.0, 2.5]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
309
+310
+311
+312
+313
+314
+315
+316
+317
@register_model
+def repvgg_a1(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[1.0, 1.0, 1.0, 2.5].
+     Refer to the base class `models.RepVGG` for more details.
+     """
+    default_cfg = default_cfgs["repvgg_a1"]
+    model_args = dict(num_blocks=[2, 4, 14, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[1.0, 1.0, 1.0, 2.5], override_group_map=None, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_a2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[1.5, 1.5, 1.5, 2.75]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
320
+321
+322
+323
+324
+325
+326
+327
+328
@register_model
+def repvgg_a2(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[2, 4, 14, 1], width_multiplier=[1.5, 1.5, 1.5, 2.75].
+     Refer to the base class `models.RepVGG` for more details.
+     """
+    default_cfg = default_cfgs["repvgg_a2"]
+    model_args = dict(num_blocks=[2, 4, 14, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[1.5, 1.5, 1.5, 2.75], override_group_map=None, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_b0(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[1.0, 1.0, 1.0, 2.5]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
331
+332
+333
+334
+335
+336
+337
+338
+339
@register_model
+def repvgg_b0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[1.0, 1.0, 1.0, 2.5].
+     Refer to the base class `models.RepVGG` for more details.
+     """
+    default_cfg = default_cfgs['repvgg_b0']
+    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[1.0, 1.0, 1.0, 2.5], override_group_map=None, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_b1(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
342
+343
+344
+345
+346
+347
+348
+349
+350
@register_model
+def repvgg_b1(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0].
+     Refer to the base class `models.RepVGG` for more details.
+     """
+    default_cfg = default_cfgs['repvgg_b1']
+    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[2.0, 2.0, 2.0, 4.0], override_group_map=None, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_b1g2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
380
+381
+382
+383
+384
+385
+386
+387
+388
@register_model
+def repvgg_b1g2(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0].
+    Refer to the base class `models.RepVGG` for more details.
+    """
+    default_cfg = default_cfgs["repvgg_b1g2"]
+    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[2.0, 2.0, 2.0, 4.0], override_group_map=g2_map, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_b1g4(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
391
+392
+393
+394
+395
+396
+397
+398
+399
@register_model
+def repvgg_b1g4(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.0, 2.0, 2.0, 4.0].
+    Refer to the base class `models.RepVGG` for more details.
+    """
+    default_cfg = default_cfgs["repvgg_b1g4"]
+    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[2.0, 2.0, 2.0, 4.0], override_group_map=g4_map, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_b2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.5, 2.5, 2.5, 5.0]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
353
+354
+355
+356
+357
+358
+359
+360
+361
@register_model
+def repvgg_b2(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.5, 2.5, 2.5, 5.0].
+     Refer to the base class `models.RepVGG` for more details.
+     """
+    default_cfg = default_cfgs['repvgg_b2']
+    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[2.5, 2.5, 2.5, 5.0], override_group_map=None, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_b2g4(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.5, 2.5, 2.5, 5.0]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
402
+403
+404
+405
+406
+407
+408
+409
+410
@register_model
+def repvgg_b2g4(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[2.5, 2.5, 2.5, 5.0].
+    Refer to the base class `models.RepVGG` for more details.
+    """
+    default_cfg = default_cfgs["repvgg_b2g4"]
+    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[2.5, 2.5, 2.5, 5.0], override_group_map=g4_map, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.repvgg.repvgg_b3(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[3.0, 3.0, 3.0, 5.0]. +Refer to the base class models.RepVGG for more details.

+ +
+ Source code in mindcv/models/repvgg.py +
364
+365
+366
+367
+368
+369
+370
+371
+372
@register_model
+def repvgg_b3(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> RepVGG:
+    """Get RepVGG model with num_blocks=[4, 6, 16, 1], width_multiplier=[3.0, 3.0, 3.0, 5.0].
+     Refer to the base class `models.RepVGG` for more details.
+     """
+    default_cfg = default_cfgs['repvgg_b3']
+    model_args = dict(num_blocks=[4, 6, 16, 1], num_classes=num_classes, in_channels=in_channels,
+                      width_multiplier=[3.0, 3.0, 3.0, 5.0], override_group_map=None, deploy=False, **kwargs)
+    return _create_repvgg(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +

res2net

+ + +
+ + + +

+ mindcv.models.res2net.Res2Net + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Res2Net model class, based on +"Res2Net: A New Multi-scale Backbone Architecture" <https://arxiv.org/abs/1904.01169>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
block +
+

block of resnet.

+
+

+ + TYPE: + Type[nn.Cell] + +

+
layer_nums +
+

number of layers of each stage.

+
+

+ + TYPE: + List[int] + +

+
version +
+

variety of Res2Net, 'res2net' or 'res2net_v1b'. Default: 'res2net'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'res2net' + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
groups +
+

number of groups for group conv in blocks. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
base_width +
+

base width of pre group hidden channel in blocks. Default: 26.

+
+

+ + TYPE: + int + + + DEFAULT: + 26 + +

+
scale +
+

scale factor of Bottle2neck. Default: 4.

+
+

+ + DEFAULT: + 4 + +

+
norm +
+

normalization layer in blocks. Default: None.

+
+

+ + TYPE: + Optional[nn.Cell] + + + DEFAULT: + None + +

+
+ +
+ Source code in mindcv/models/res2net.py +
142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
class Res2Net(nn.Cell):
+    r"""Res2Net model class, based on
+    `"Res2Net: A New Multi-scale Backbone Architecture" <https://arxiv.org/abs/1904.01169>`_
+
+    Args:
+        block: block of resnet.
+        layer_nums: number of layers of each stage.
+        version: variety of Res2Net, 'res2net' or 'res2net_v1b'. Default: 'res2net'.
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number the channels of the input. Default: 3.
+        groups: number of groups for group conv in blocks. Default: 1.
+        base_width: base width of pre group hidden channel in blocks. Default: 26.
+        scale: scale factor of Bottle2neck. Default: 4.
+        norm: normalization layer in blocks. Default: None.
+    """
+
+    def __init__(
+        self,
+        block: Type[nn.Cell],
+        layer_nums: List[int],
+        version: str = "res2net",
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        groups: int = 1,
+        base_width: int = 26,
+        scale=4,
+        norm: Optional[nn.Cell] = None,
+    ) -> None:
+        super().__init__()
+        assert version in ["res2net", "res2net_v1b"]
+        self.version = version
+
+        if norm is None:
+            norm = nn.BatchNorm2d
+        self.norm = norm
+
+        self.num_classes = num_classes
+        self.input_channels = 64
+        self.groups = groups
+        self.base_width = base_width
+        self.scale = scale
+        if self.version == "res2net":
+            self.conv1 = nn.Conv2d(in_channels, self.input_channels, kernel_size=7,
+                                   stride=2, padding=3, pad_mode="pad")
+        elif self.version == "res2net_v1b":
+            self.conv1 = nn.SequentialCell([
+                nn.Conv2d(in_channels, self.input_channels // 2, kernel_size=3,
+                          stride=2, padding=1, pad_mode="pad"),
+                norm(self.input_channels // 2),
+                nn.ReLU(),
+                nn.Conv2d(self.input_channels // 2, self.input_channels // 2, kernel_size=3,
+                          stride=1, padding=1, pad_mode="pad"),
+                norm(self.input_channels // 2),
+                nn.ReLU(),
+                nn.Conv2d(self.input_channels // 2, self.input_channels, kernel_size=3,
+                          stride=1, padding=1, pad_mode="pad"),
+            ])
+
+        self.bn1 = norm(self.input_channels)
+        self.relu = nn.ReLU()
+        self.max_pool = nn.SequentialCell([
+            nn.Pad(paddings=((0, 0), (0, 0), (1, 1), (1, 1)), mode="CONSTANT"),
+            nn.MaxPool2d(kernel_size=3, stride=2)
+        ])
+        self.layer1 = self._make_layer(block, 64, layer_nums[0])
+        self.layer2 = self._make_layer(block, 128, layer_nums[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layer_nums[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layer_nums[3], stride=2)
+
+        self.pool = GlobalAvgPooling()
+        self.num_features = 512 * block.expansion
+        self.classifier = nn.Dense(self.num_features, num_classes)
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(math.sqrt(5), mode="fan_out", nonlinearity="relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="leaky_relu"),
+                                         cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="leaky_relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def _make_layer(
+        self,
+        block: Type[nn.Cell],
+        channels: int,
+        block_nums: int,
+        stride: int = 1,
+    ) -> nn.SequentialCell:
+        down_sample = None
+
+        if stride != 1 or self.input_channels != channels * block.expansion:
+            if stride == 1 or self.version == "res2net":
+                down_sample = nn.SequentialCell([
+                    nn.Conv2d(self.input_channels, channels * block.expansion, kernel_size=1, stride=stride),
+                    self.norm(channels * block.expansion)
+                ])
+            else:
+                down_sample = nn.SequentialCell([
+                    nn.AvgPool2d(kernel_size=stride, stride=stride, pad_mode="same"),
+                    nn.Conv2d(self.input_channels, channels * block.expansion, kernel_size=1, stride=1),
+                    self.norm(channels * block.expansion)
+                ])
+
+        layers = []
+        layers.append(
+            block(
+                self.input_channels,
+                channels,
+                stride=stride,
+                down_sample=down_sample,
+                groups=self.groups,
+                base_width=self.base_width,
+                scale=self.scale,
+                stype="stage",
+                norm=self.norm,
+            )
+        )
+        self.input_channels = channels * block.expansion
+
+        for _ in range(1, block_nums):
+            layers.append(
+                block(
+                    self.input_channels,
+                    channels,
+                    groups=self.groups,
+                    base_width=self.base_width,
+                    scale=self.scale,
+                    norm=self.norm,
+                )
+            )
+
+        return nn.SequentialCell(layers)
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.max_pool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.res2net.res2net101(pretrained=False, num_classes=1001, in_channels=3, **kwargs) + +

+ + +
+ +

Get 101 layers Res2Net model. +Refer to the base class models.Res2Net for more details.

+ +
+ Source code in mindcv/models/res2net.py +
326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
@register_model
+def res2net101(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):
+    """Get 101 layers Res2Net model.
+    Refer to the base class `models.Res2Net` for more details.
+    """
+    default_cfg = default_cfgs["res2net101"]
+    model = Res2Net(Bottle2neck, [3, 4, 23, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.res2net.res2net101_v1b(pretrained=False, num_classes=1001, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/res2net.py +
366
+367
+368
+369
+370
+371
+372
+373
+374
+375
@register_model
+def res2net101_v1b(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["res2net101_v1b"]
+    model = Res2Net(Bottle2neck, [3, 4, 23, 3], version="res2net_v1b", num_classes=num_classes,
+                    in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.res2net.res2net152(pretrained=False, num_classes=1001, in_channels=3, **kwargs) + +

+ + +
+ +

Get 152 layers Res2Net model. +Refer to the base class models.Res2Net for more details.

+ +
+ Source code in mindcv/models/res2net.py +
340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
@register_model
+def res2net152(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):
+    """Get 152 layers Res2Net model.
+    Refer to the base class `models.Res2Net` for more details.
+    """
+    default_cfg = default_cfgs["res2net152"]
+    model = Res2Net(Bottle2neck, [3, 8, 36, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.res2net.res2net152_v1b(pretrained=False, num_classes=1001, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/res2net.py +
378
+379
+380
+381
+382
+383
+384
+385
+386
+387
@register_model
+def res2net152_v1b(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["res2net152_v1b"]
+    model = Res2Net(Bottle2neck, [3, 8, 36, 3], version="res2net_v1b", num_classes=num_classes,
+                    in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.res2net.res2net50(pretrained=False, num_classes=1001, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers Res2Net model. +Refer to the base class models.Res2Net for more details.

+ +
+ Source code in mindcv/models/res2net.py +
312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
@register_model
+def res2net50(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):
+    """Get 50 layers Res2Net model.
+    Refer to the base class `models.Res2Net` for more details.
+    """
+    default_cfg = default_cfgs["res2net50"]
+    model = Res2Net(Bottle2neck, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.res2net.res2net50_v1b(pretrained=False, num_classes=1001, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/res2net.py +
354
+355
+356
+357
+358
+359
+360
+361
+362
+363
@register_model
+def res2net50_v1b(pretrained: bool = False, num_classes: int = 1001, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["res2net50_v1b"]
+    model = Res2Net(Bottle2neck, [3, 4, 6, 3], version="res2net_v1b", num_classes=num_classes,
+                    in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

resnest

+ + +
+ + + +

+ mindcv.models.resnest.ResNeSt + + +

+ + +
+

+ Bases: nn.Cell

+ + +

ResNeSt model class, based on +"ResNeSt: Split-Attention Networks" <https://arxiv.org/abs/2004.08955>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
block +
+

Class for the residual block. Option is Bottleneck.

+
+

+ + TYPE: + Type[Bottleneck] + +

+
layers +
+

Numbers of layers in each block.

+
+

+ + TYPE: + List[int] + +

+
radix +
+

Number of groups for Split-Attention conv. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
group +
+

Number of groups for the conv in each bottleneck block. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
bottleneck_width +
+

bottleneck channels factor. Default: 64.

+
+

+ + TYPE: + int + + + DEFAULT: + 64 + +

+
num_classes +
+

Number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
dilated +
+

Applying dilation strategy to pretrained ResNeSt yielding a stride-8 model, + typically used in Semantic Segmentation. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
dilation +
+

Number of dilation in the conv. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
deep_stem +
+

three 3x3 convolution layers of widths stem_width, stem_width, stem_width * 2. + Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
stem_width +
+

number of channels in stem convolutions. Default: 64.

+
+

+ + TYPE: + int + + + DEFAULT: + 64 + +

+
avg_down +
+

use avg pooling for projection skip connection between stages/downsample. + Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
avd +
+

use avg pooling before or after split-attention conv. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
avd_first +
+

use avg pooling before or after split-attention conv. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
drop_rate +
+

Drop probability for the Dropout layer. Default: 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
norm_layer +
+

Normalization layer used in backbone network. Default: nn.BatchNorm2d.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + nn.BatchNorm2d + +

+
+ +
+ Source code in mindcv/models/resnest.py +
225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
class ResNeSt(nn.Cell):
+    r"""ResNeSt model class, based on
+    `"ResNeSt: Split-Attention Networks" <https://arxiv.org/abs/2004.08955>`_
+
+    Args:
+        block: Class for the residual block. Option is Bottleneck.
+        layers: Numbers of layers in each block.
+        radix: Number of groups for Split-Attention conv. Default: 1.
+        group: Number of groups for the conv in each bottleneck block. Default: 1.
+        bottleneck_width: bottleneck channels factor. Default: 64.
+        num_classes: Number of classification classes. Default: 1000.
+        dilated: Applying dilation strategy to pretrained ResNeSt yielding a stride-8 model,
+                 typically used in Semantic Segmentation. Default: False.
+        dilation: Number of dilation in the conv. Default: 1.
+        deep_stem: three 3x3 convolution layers of widths stem_width, stem_width, stem_width * 2.
+                   Default: False.
+        stem_width: number of channels in stem convolutions. Default: 64.
+        avg_down: use avg pooling for projection skip connection between stages/downsample.
+                  Default: False.
+        avd: use avg pooling before or after split-attention conv. Default: False.
+        avd_first: use avg pooling before or after split-attention conv. Default: False.
+        drop_rate: Drop probability for the Dropout layer. Default: 0.
+        norm_layer: Normalization layer used in backbone network. Default: nn.BatchNorm2d.
+    """
+
+    def __init__(
+        self,
+        block: Type[Bottleneck],
+        layers: List[int],
+        radix: int = 1,
+        group: int = 1,
+        bottleneck_width: int = 64,
+        num_classes: int = 1000,
+        dilated: bool = False,
+        dilation: int = 1,
+        deep_stem: bool = False,
+        stem_width: int = 64,
+        avg_down: bool = False,
+        avd: bool = False,
+        avd_first: bool = False,
+        drop_rate: float = 0.0,
+        norm_layer: nn.Cell = nn.BatchNorm2d,
+    ) -> None:
+        super(ResNeSt, self).__init__()
+        self.cardinality = group
+        self.bottleneck_width = bottleneck_width
+        # ResNet-D params
+        self.inplanes = stem_width * 2 if deep_stem else 64
+        self.avg_down = avg_down
+        # ResNeSt params
+        self.radix = radix
+        self.avd = avd
+        self.avd_first = avd_first
+
+        if deep_stem:
+            self.conv1 = nn.SequentialCell([
+                nn.Conv2d(3, stem_width, kernel_size=3, stride=2, pad_mode="pad",
+                          padding=1, has_bias=False),
+                norm_layer(stem_width),
+                nn.ReLU(),
+                nn.Conv2d(stem_width, stem_width, kernel_size=3, stride=1, pad_mode="pad",
+                          padding=1, has_bias=False),
+                norm_layer(stem_width),
+                nn.ReLU(),
+                nn.Conv2d(stem_width, stem_width * 2, kernel_size=3, stride=1, pad_mode="pad",
+                          padding=1, has_bias=False),
+            ])
+        else:
+            self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, pad_mode="pad", padding=3,
+                                   has_bias=False)
+
+        self.bn1 = norm_layer(self.inplanes)
+        self.relu = nn.ReLU()
+        self.feature_info = [dict(chs=self.inplanes, reduction=2, name="relu")]
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
+
+        self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer, is_first=False)
+        self.feature_info.append(dict(chs=block.expansion * 64, reduction=4, name='layer1'))
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_layer=norm_layer)
+        self.feature_info.append(dict(chs=block.expansion * 128, reduction=8, name='layer2'))
+
+        if dilated or dilation == 4:
+            self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2, norm_layer=norm_layer)
+            self.feature_info.append(dict(chs=block.expansion * 256, reduction=8, name='layer3'))
+            self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, norm_layer=norm_layer)
+            self.feature_info.append(dict(chs=block.expansion * 512, reduction=8, name='layer4'))
+        elif dilation == 2:
+            self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilation=1, norm_layer=norm_layer)
+            self.feature_info.append(dict(chs=block.expansion * 256, reduction=16, name='layer3'))
+            self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=2, norm_layer=norm_layer)
+            self.feature_info.append(dict(chs=block.expansion * 512, reduction=16, name='layer4'))
+        else:
+            self.layer3 = self._make_layer(block, 256, layers[2], stride=2, norm_layer=norm_layer)
+            self.feature_info.append(dict(chs=block.expansion * 256, reduction=16, name='layer3'))
+            self.layer4 = self._make_layer(block, 512, layers[3], stride=2, norm_layer=norm_layer)
+            self.feature_info.append(dict(chs=block.expansion * 512, reduction=32, name='layer4'))
+
+        self.avgpool = GlobalAvgPooling()
+        self.drop = Dropout(p=drop_rate) if drop_rate > 0.0 else None
+        self.fc = nn.Dense(512 * block.expansion, num_classes)
+
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(
+                        init.HeNormal(mode="fan_out", nonlinearity="relu"), cell.weight.shape, cell.weight.dtype
+                    )
+                )
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(
+                        init.HeUniform(mode="fan_in", nonlinearity="sigmoid"), cell.weight.shape, cell.weight.dtype
+                    )
+                )
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def _make_layer(
+        self,
+        block: Type[Bottleneck],
+        planes: int,
+        blocks: int,
+        stride: int = 1,
+        dilation: int = 1,
+        norm_layer: Optional[nn.Cell] = None,
+        is_first: bool = True,
+    ) -> nn.SequentialCell:
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            down_layers = []
+            if self.avg_down:
+                if dilation == 1:
+                    down_layers.append(nn.AvgPool2d(kernel_size=stride, stride=stride, pad_mode="valid"))
+                else:
+                    down_layers.append(nn.AvgPool2d(kernel_size=1, stride=1, pad_mode="valid"))
+
+                down_layers.append(nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1,
+                                             stride=1, has_bias=False))
+            else:
+                down_layers.append(
+                    nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride,
+                              has_bias=False))
+            down_layers.append(norm_layer(planes * block.expansion))
+            downsample = nn.SequentialCell(down_layers)
+
+        layers = []
+        if dilation == 1 or dilation == 2:
+            layers.append(
+                block(
+                    self.inplanes,
+                    planes,
+                    stride,
+                    downsample=downsample,
+                    radix=self.radix,
+                    cardinality=self.cardinality,
+                    bottleneck_width=self.bottleneck_width,
+                    avd=self.avd,
+                    avd_first=self.avd_first,
+                    dilation=1,
+                    is_first=is_first,
+                    norm_layer=norm_layer,
+                )
+            )
+        elif dilation == 4:
+            layers.append(
+                block(
+                    self.inplanes,
+                    planes,
+                    stride,
+                    downsample=downsample,
+                    radix=self.radix,
+                    cardinality=self.cardinality,
+                    bottleneck_width=self.bottleneck_width,
+                    avd=self.avd,
+                    avd_first=self.avd_first,
+                    dilation=2,
+                    is_first=is_first,
+                    norm_layer=norm_layer,
+                )
+            )
+        else:
+            raise ValueError(f"Unsupported model type {dilation}")
+
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(
+                block(
+                    self.inplanes,
+                    planes,
+                    radix=self.radix,
+                    cardinality=self.cardinality,
+                    bottleneck_width=self.bottleneck_width,
+                    avd=self.avd,
+                    avd_first=self.avd_first,
+                    dilation=dilation,
+                    norm_layer=norm_layer,
+                )
+            )
+
+        return nn.SequentialCell(layers)
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.avgpool(x)
+        if self.drop:
+            x = self.drop(x)
+        x = self.fc(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.resnest.resnest101(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/resnest.py +
494
+495
+496
+497
+498
+499
+500
+501
@register_model
+def resnest101(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["resnest101"]
+    model_args = dict(block=Bottleneck, layers=[3, 4, 23, 3], radix=2, group=1,
+                      bottleneck_width=64, num_classes=num_classes,
+                      deep_stem=True, stem_width=64, avg_down=True,
+                      avd=True, avd_first=False, **kwargs)
+    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnest.resnest14(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/resnest.py +
464
+465
+466
+467
+468
+469
+470
+471
@register_model
+def resnest14(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["resnest14"]
+    model_args = dict(block=Bottleneck, layers=[1, 1, 1, 1], radix=2, group=1,
+                      bottleneck_width=64, num_classes=num_classes,
+                      deep_stem=True, stem_width=32, avg_down=True,
+                      avd=True, avd_first=False, **kwargs)
+    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnest.resnest200(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/resnest.py +
504
+505
+506
+507
+508
+509
+510
+511
@register_model
+def resnest200(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["resnest200"]
+    model_args = dict(block=Bottleneck, layers=[3, 24, 36, 3], radix=2, group=1,
+                      bottleneck_width=64, num_classes=num_classes,
+                      deep_stem=True, stem_width=64, avg_down=True,
+                      avd=True, avd_first=False, **kwargs)
+    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnest.resnest26(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/resnest.py +
474
+475
+476
+477
+478
+479
+480
+481
@register_model
+def resnest26(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["resnest26"]
+    model_args = dict(block=Bottleneck, layers=[2, 2, 2, 2], radix=2, group=1,
+                      bottleneck_width=64, num_classes=num_classes,
+                      deep_stem=True, stem_width=32, avg_down=True,
+                      avd=True, avd_first=False, **kwargs)
+    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnest.resnest269(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/resnest.py +
514
+515
+516
+517
+518
+519
+520
+521
@register_model
+def resnest269(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["resnest269"]
+    model_args = dict(block=Bottleneck, layers=[3, 30, 48, 8], radix=2, group=1,
+                      bottleneck_width=64, num_classes=num_classes,
+                      deep_stem=True, stem_width=64, avg_down=True,
+                      avd=True, avd_first=False, **kwargs)
+    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnest.resnest50(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/resnest.py +
484
+485
+486
+487
+488
+489
+490
+491
@register_model
+def resnest50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["resnest50"]
+    model_args = dict(block=Bottleneck, layers=[3, 4, 6, 3], radix=2, group=1,
+                      bottleneck_width=64, num_classes=num_classes,
+                      deep_stem=True, stem_width=32, avg_down=True,
+                      avd=True, avd_first=False, **kwargs)
+    return _create_resnest(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +

resnet

+ + +
+ + + +

+ mindcv.models.resnet.ResNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

ResNet model class, based on +"Deep Residual Learning for Image Recognition" <https://arxiv.org/abs/1512.03385>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
block +
+

block of resnet.

+
+

+ + TYPE: + Type[Union[BasicBlock, Bottleneck]] + +

+
layers +
+

number of layers of each stage.

+
+

+ + TYPE: + List[int] + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
groups +
+

number of groups for group conv in blocks. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
base_width +
+

base width of pre group hidden channel in blocks. Default: 64.

+
+

+ + TYPE: + int + + + DEFAULT: + 64 + +

+
norm +
+

normalization layer in blocks. Default: None.

+
+

+ + TYPE: + Optional[nn.Cell] + + + DEFAULT: + None + +

+
+ +
+ Source code in mindcv/models/resnet.py +
163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
class ResNet(nn.Cell):
+    r"""ResNet model class, based on
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/abs/1512.03385>`_
+
+    Args:
+        block: block of resnet.
+        layers: number of layers of each stage.
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number the channels of the input. Default: 3.
+        groups: number of groups for group conv in blocks. Default: 1.
+        base_width: base width of pre group hidden channel in blocks. Default: 64.
+        norm: normalization layer in blocks. Default: None.
+    """
+
+    def __init__(
+        self,
+        block: Type[Union[BasicBlock, Bottleneck]],
+        layers: List[int],
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        groups: int = 1,
+        base_width: int = 64,
+        norm: Optional[nn.Cell] = None,
+    ) -> None:
+        super().__init__()
+        if norm is None:
+            norm = nn.BatchNorm2d
+
+        self.norm: nn.Cell = norm  # add type hints to make pylint happy
+        self.input_channels = 64
+        self.groups = groups
+        self.base_with = base_width
+
+        self.conv1 = nn.Conv2d(in_channels, self.input_channels, kernel_size=7,
+                               stride=2, pad_mode="pad", padding=3)
+        self.bn1 = norm(self.input_channels)
+        self.relu = nn.ReLU()
+        self.feature_info = [dict(chs=self.input_channels, reduction=2, name="relu")]
+        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.feature_info.append(dict(chs=block.expansion * 64, reduction=4, name="layer1"))
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.feature_info.append(dict(chs=block.expansion * 128, reduction=8, name="layer2"))
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.feature_info.append(dict(chs=block.expansion * 256, reduction=16, name="layer3"))
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+        self.feature_info.append(dict(chs=block.expansion * 512, reduction=32, name="layer4"))
+
+        self.pool = GlobalAvgPooling()
+        self.num_features = 512 * block.expansion
+        self.classifier = nn.Dense(self.num_features, num_classes)
+
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(mode='fan_out', nonlinearity='relu'),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer('zeros', cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer('ones', cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer('zeros', cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(mode='fan_in', nonlinearity='sigmoid'),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer('zeros', cell.bias.shape, cell.bias.dtype))
+
+    def _make_layer(
+        self,
+        block: Type[Union[BasicBlock, Bottleneck]],
+        channels: int,
+        block_nums: int,
+        stride: int = 1,
+    ) -> nn.SequentialCell:
+        """build model depending on cfgs"""
+        down_sample = None
+
+        if stride != 1 or self.input_channels != channels * block.expansion:
+            down_sample = nn.SequentialCell([
+                nn.Conv2d(self.input_channels, channels * block.expansion, kernel_size=1, stride=stride),
+                self.norm(channels * block.expansion)
+            ])
+
+        layers = []
+        layers.append(
+            block(
+                self.input_channels,
+                channels,
+                stride=stride,
+                down_sample=down_sample,
+                groups=self.groups,
+                base_width=self.base_with,
+                norm=self.norm,
+            )
+        )
+        self.input_channels = channels * block.expansion
+
+        for _ in range(1, block_nums):
+            layers.append(
+                block(
+                    self.input_channels,
+                    channels,
+                    groups=self.groups,
+                    base_width=self.base_with,
+                    norm=self.norm
+                )
+            )
+
+        return nn.SequentialCell(layers)
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        """Network forward feature extraction."""
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.max_pool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.resnet.ResNet.forward_features(x) + +

+ + +
+ +

Network forward feature extraction.

+ +
+ Source code in mindcv/models/resnet.py +
280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
def forward_features(self, x: Tensor) -> Tensor:
+    """Network forward feature extraction."""
+    x = self.conv1(x)
+    x = self.bn1(x)
+    x = self.relu(x)
+    x = self.max_pool(x)
+
+    x = self.layer1(x)
+    x = self.layer2(x)
+    x = self.layer3(x)
+    x = self.layer4(x)
+    return x
+
+
+
+ +
+ + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnet101(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 101 layers ResNet model. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
341
+342
+343
+344
+345
+346
+347
+348
+349
@register_model
+def resnet101(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 101 layers ResNet model.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnet101"]
+    model_args = dict(block=Bottleneck, layers=[3, 4, 23, 3], num_classes=num_classes, in_channels=in_channels,
+                      **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnet152(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 152 layers ResNet model. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
352
+353
+354
+355
+356
+357
+358
+359
+360
@register_model
+def resnet152(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 152 layers ResNet model.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnet152"]
+    model_args = dict(block=Bottleneck, layers=[3, 8, 36, 3], num_classes=num_classes, in_channels=in_channels,
+                      **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnet18(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 18 layers ResNet model. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
308
+309
+310
+311
+312
+313
+314
+315
+316
@register_model
+def resnet18(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 18 layers ResNet model.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnet18"]
+    model_args = dict(block=BasicBlock, layers=[2, 2, 2, 2], num_classes=num_classes, in_channels=in_channels,
+                      **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnet34(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 34 layers ResNet model. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
319
+320
+321
+322
+323
+324
+325
+326
+327
@register_model
+def resnet34(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 34 layers ResNet model.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnet34"]
+    model_args = dict(block=BasicBlock, layers=[3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels,
+                      **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnet50(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers ResNet model. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
330
+331
+332
+333
+334
+335
+336
+337
+338
@register_model
+def resnet50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 50 layers ResNet model.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnet50"]
+    model_args = dict(block=Bottleneck, layers=[3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels,
+                      **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnext101_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 101 layers ResNeXt model with 32 groups of GPConv. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
374
+375
+376
+377
+378
+379
+380
+381
+382
@register_model
+def resnext101_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 101 layers ResNeXt model with 32 groups of GPConv.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnext101_32x4d"]
+    model_args = dict(block=Bottleneck, layers=[3, 4, 23, 3], groups=32, base_width=4, num_classes=num_classes,
+                      in_channels=in_channels, **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnext101_64x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 101 layers ResNeXt model with 64 groups of GPConv. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
385
+386
+387
+388
+389
+390
+391
+392
+393
@register_model
+def resnext101_64x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 101 layers ResNeXt model with 64 groups of GPConv.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnext101_64x4d"]
+    model_args = dict(block=Bottleneck, layers=[3, 4, 23, 3], groups=64, base_width=4, num_classes=num_classes,
+                      in_channels=in_channels, **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnext152_64x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/resnet.py +
396
+397
+398
+399
+400
+401
@register_model
+def resnext152_64x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["resnext152_64x4d"]
+    model_args = dict(block=Bottleneck, layers=[3, 8, 36, 3], groups=64, base_width=4, num_classes=num_classes,
+                      in_channels=in_channels, **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnet.resnext50_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers ResNeXt model with 32 groups of GPConv. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnet.py +
363
+364
+365
+366
+367
+368
+369
+370
+371
@register_model
+def resnext50_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 50 layers ResNeXt model with 32 groups of GPConv.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnext50_32x4d"]
+    model_args = dict(block=Bottleneck, layers=[3, 4, 6, 3], groups=32, base_width=4, num_classes=num_classes,
+                      in_channels=in_channels, **kwargs)
+    return _create_resnet(pretrained, **dict(default_cfg=default_cfg, **model_args))
+
+
+
+ +

resnetv2

+ + + +
+ + + +

+mindcv.models.resnetv2.resnetv2_101(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 101 layers ResNetV2 model. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnetv2.py +
108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
@register_model
+def resnetv2_101(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 101 layers ResNetV2 model.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs["resnetv2_101"]
+    model = ResNet(PreActBottleneck, [3, 4, 23, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.resnetv2.resnetv2_50(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers ResNetV2 model. +Refer to the base class models.ResNet for more details.

+ +
+ Source code in mindcv/models/resnetv2.py +
 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
@register_model
+def resnetv2_50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """Get 50 layers ResNetV2 model.
+    Refer to the base class `models.ResNet` for more details.
+    """
+    default_cfg = default_cfgs['resnetv2_50']
+    model = ResNet(PreActBottleneck, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

rexnet

+ + +
+ + + +

+ mindcv.models.rexnet.ReXNetV1 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

ReXNet model class, based on +"Rethinking Channel Dimensions for Efficient Model Design" <https://arxiv.org/abs/2007.00992>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
in_channels +
+

number of the input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
fi_channels +
+

number of the final channels. Default: 180.

+
+

+ + TYPE: + int + + + DEFAULT: + 180 + +

+
initial_channels +
+

initialize inplanes. Default: 16.

+
+

+ + TYPE: + int + + + DEFAULT: + 16 + +

+
width_mult +
+

The ratio of the channel. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
depth_mult +
+

The ratio of num_layers. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int) + + + DEFAULT: + 1000 + +

+
use_se +
+

use SENet in LinearBottleneck. Default: True.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
se_ratio +
+

(float): SENet reduction ratio. Default 1/12.

+
+

+ + DEFAULT: + 1 / 12 + +

+
drop_rate +
+

dropout ratio. Default: 0.2.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.2 + +

+
ch_div +
+

divisible by ch_div. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
act_layer +
+

activation function in ConvNormAct. Default: nn.SiLU.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + nn.SiLU + +

+
dw_act_layer +
+

activation function after dw_conv. Default: nn.ReLU6.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + nn.ReLU6 + +

+
cls_useconv +
+

use conv in classification. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/rexnet.py +
106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
class ReXNetV1(nn.Cell):
+    r"""ReXNet model class, based on
+    `"Rethinking Channel Dimensions for Efficient Model Design" <https://arxiv.org/abs/2007.00992>`_
+
+    Args:
+        in_channels (int): number of the input channels. Default: 3.
+        fi_channels (int): number of the final channels. Default: 180.
+        initial_channels (int): initialize inplanes. Default: 16.
+        width_mult (float): The ratio of the channel. Default: 1.0.
+        depth_mult (float): The ratio of num_layers. Default: 1.0.
+        num_classes (int) : number of classification classes. Default: 1000.
+        use_se (bool): use SENet in LinearBottleneck. Default: True.
+        se_ratio: (float): SENet reduction ratio. Default 1/12.
+        drop_rate (float): dropout ratio. Default: 0.2.
+        ch_div (int): divisible by ch_div. Default: 1.
+        act_layer (nn.Cell): activation function in ConvNormAct. Default: nn.SiLU.
+        dw_act_layer (nn.Cell): activation function after dw_conv. Default: nn.ReLU6.
+        cls_useconv (bool): use conv in classification. Default: False.
+    """
+
+    def __init__(
+        self,
+        in_channels=3,
+        fi_channels=180,
+        initial_channels=16,
+        width_mult=1.0,
+        depth_mult=1.0,
+        num_classes=1000,
+        use_se=True,
+        se_ratio=1 / 12,
+        drop_rate=0.2,
+        drop_path_rate=0.0,
+        ch_div=1,
+        act_layer=nn.SiLU,
+        dw_act_layer=nn.ReLU6,
+        cls_useconv=False,
+    ):
+        super(ReXNetV1, self).__init__()
+
+        layers = [1, 2, 2, 3, 3, 5]
+        strides = [1, 2, 2, 2, 1, 2]
+        use_ses = [False, False, True, True, True, True]
+
+        layers = [ceil(element * depth_mult) for element in layers]
+        strides = sum([[element] + [1] * (layers[idx] - 1)
+                       for idx, element in enumerate(strides)], [])
+        if use_se:
+            use_ses = sum([[element] * layers[idx] for idx, element in enumerate(use_ses)], [])
+        else:
+            use_ses = [False] * sum(layers[:])
+        exp_ratios = [1] * layers[0] + [6] * sum(layers[1:])
+
+        self.depth = sum(layers[:]) * 3
+        stem_channel = 32 / width_mult if width_mult < 1.0 else 32
+        inplanes = initial_channels / width_mult if width_mult < 1.0 else initial_channels
+
+        features = []
+        in_channels_group = []
+        out_channels_group = []
+
+        for i in range(self.depth // 3):
+            if i == 0:
+                in_channels_group.append(int(round(stem_channel * width_mult)))
+                out_channels_group.append(int(round(inplanes * width_mult)))
+            else:
+                in_channels_group.append(int(round(inplanes * width_mult)))
+                inplanes += fi_channels / (self.depth // 3 * 1.0)
+                out_channels_group.append(int(round(inplanes * width_mult)))
+
+        stem_chs = make_divisible(round(stem_channel * width_mult), divisor=ch_div)
+        self.stem = Conv2dNormActivation(in_channels, stem_chs, stride=2, padding=1, activation=act_layer)
+
+        feat_chs = [stem_chs]
+        self.feature_info = []
+        curr_stride = 2
+        features = []
+        num_blocks = len(in_channels_group)
+        for block_idx, (in_c, out_c, exp_ratio, stride, use_se) in enumerate(
+            zip(in_channels_group, out_channels_group, exp_ratios, strides, use_ses)
+        ):
+            if stride > 1:
+                fname = "stem" if block_idx == 0 else f"features.{block_idx - 1}"
+                self.feature_info += [dict(chs=feat_chs[-1], reduction=curr_stride, name=fname)]
+            block_dpr = drop_path_rate * block_idx / (num_blocks - 1)  # stochastic depth linear decay rule
+            drop_path = DropPath(block_dpr) if block_dpr > 0. else None
+            features.append(LinearBottleneck(in_channels=in_c,
+                                             out_channels=out_c,
+                                             exp_ratio=exp_ratio,
+                                             stride=stride,
+                                             use_se=use_se,
+                                             se_ratio=se_ratio,
+                                             act_layer=act_layer,
+                                             dw_act_layer=dw_act_layer,
+                                             drop_path=drop_path))
+            curr_stride *= stride
+            feat_chs.append(out_c)
+
+        pen_channels = make_divisible(int(1280 * width_mult), divisor=ch_div)
+        self.feature_info += [dict(chs=feat_chs[-1], reduction=curr_stride, name=f'features.{len(features) - 1}')]
+        self.flatten_sequential = True
+        features.append(Conv2dNormActivation(out_channels_group[-1],
+                                             pen_channels,
+                                             kernel_size=1,
+                                             activation=act_layer))
+
+        features.append(GlobalAvgPooling(keep_dims=True))
+        self.useconv = cls_useconv
+        self.features = nn.SequentialCell(*features)
+        if self.useconv:
+            self.cls = nn.SequentialCell(
+                Dropout(p=drop_rate),
+                nn.Conv2d(pen_channels, num_classes, 1, has_bias=True))
+        else:
+            self.cls = nn.SequentialCell(
+                Dropout(p=drop_rate),
+                nn.Dense(pen_channels, num_classes))
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, (nn.Conv2d, nn.Dense)):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer(init.HeUniform(math.sqrt(5), mode="fan_in", nonlinearity="leaky_relu"),
+                                         [1, cell.bias.shape[0]], cell.bias.dtype).reshape((-1)))
+
+    def forward_features(self, x):
+        x = self.stem(x)
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x):
+        if not self.useconv:
+            x = x.reshape((x.shape[0], -1))
+            x = self.cls(x)
+        else:
+            x = self.cls(x).reshape((x.shape[0], -1))
+        return x
+
+    def construct(self, x):
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.rexnet.rexnet_09(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ReXNet model with width multiplier of 0.9. +Refer to the base class models.ReXNetV1 for more details.

+ +
+ Source code in mindcv/models/rexnet.py +
269
+270
+271
+272
+273
+274
@register_model
+def rexnet_09(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ReXNetV1:
+    """Get ReXNet model with width multiplier of 0.9.
+    Refer to the base class `models.ReXNetV1` for more details.
+    """
+    return _rexnet("rexnet_09", 0.9, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.rexnet.rexnet_10(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ReXNet model with width multiplier of 1.0. +Refer to the base class models.ReXNetV1 for more details.

+ +
+ Source code in mindcv/models/rexnet.py +
277
+278
+279
+280
+281
+282
@register_model
+def rexnet_10(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ReXNetV1:
+    """Get ReXNet model with width multiplier of 1.0.
+    Refer to the base class `models.ReXNetV1` for more details.
+    """
+    return _rexnet("rexnet_10", 1.0, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.rexnet.rexnet_13(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ReXNet model with width multiplier of 1.3. +Refer to the base class models.ReXNetV1 for more details.

+ +
+ Source code in mindcv/models/rexnet.py +
285
+286
+287
+288
+289
+290
@register_model
+def rexnet_13(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ReXNetV1:
+    """Get ReXNet model with width multiplier of 1.3.
+    Refer to the base class `models.ReXNetV1` for more details.
+    """
+    return _rexnet("rexnet_13", 1.3, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.rexnet.rexnet_15(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ReXNet model with width multiplier of 1.5. +Refer to the base class models.ReXNetV1 for more details.

+ +
+ Source code in mindcv/models/rexnet.py +
293
+294
+295
+296
+297
+298
@register_model
+def rexnet_15(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ReXNetV1:
+    """Get ReXNet model with width multiplier of 1.5.
+    Refer to the base class `models.ReXNetV1` for more details.
+    """
+    return _rexnet("rexnet_15", 1.5, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.rexnet.rexnet_20(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ReXNet model with width multiplier of 2.0. +Refer to the base class models.ReXNetV1 for more details.

+ +
+ Source code in mindcv/models/rexnet.py +
301
+302
+303
+304
+305
+306
@register_model
+def rexnet_20(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ReXNetV1:
+    """Get ReXNet model with width multiplier of 2.0.
+    Refer to the base class `models.ReXNetV1` for more details.
+    """
+    return _rexnet("rexnet_20", 2.0, in_channels, num_classes, pretrained, **kwargs)
+
+
+
+ +

senet

+ + +
+ + + +

+ mindcv.models.senet.SENet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

SENet model class, based on +"Squeeze-and-Excitation Networks" <https://arxiv.org/abs/1709.01507>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
block +
+

block class of SENet.

+
+

+ + TYPE: + Type[Union[SEBottleneck, SEResNetBottleneck, SEResNetBlock, SEResNeXtBottleneck]] + +

+
layers +
+

Number of residual blocks for 4 layers.

+
+

+ + TYPE: + List[int] + +

+
group +
+

Number of groups for the conv in each bottleneck block.

+
+

+ + TYPE: + int + +

+
reduction +
+

Reduction ratio for Squeeze-and-Excitation modules.

+
+

+ + TYPE: + int + +

+
drop_rate +
+

Drop probability for the Dropout layer. Default: 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
inplanes +
+

Number of input channels for layer1. Default: 64.

+
+

+ + TYPE: + int + + + DEFAULT: + 64 + +

+
input3x3 +
+

If True, use three 3x3 convolutions in layer0. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
downsample_kernel_size +
+

Kernel size for downsampling convolutions. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
downsample_padding +
+

Padding for downsampling convolutions. Default: 0.

+
+

+ + TYPE: + int + + + DEFAULT: + 0 + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
+ +
+ Source code in mindcv/models/senet.py +
234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
class SENet(nn.Cell):
+    r"""SENet model class, based on
+    `"Squeeze-and-Excitation Networks" <https://arxiv.org/abs/1709.01507>`_
+
+    Args:
+        block: block class of SENet.
+        layers: Number of residual blocks for 4 layers.
+        group: Number of groups for the conv in each bottleneck block.
+        reduction: Reduction ratio for Squeeze-and-Excitation modules.
+        drop_rate: Drop probability for the Dropout layer. Default: 0.
+        in_channels: number the channels of the input. Default: 3.
+        inplanes:  Number of input channels for layer1. Default: 64.
+        input3x3: If `True`, use three 3x3 convolutions in layer0. Default: False.
+        downsample_kernel_size: Kernel size for downsampling convolutions. Default: 1.
+        downsample_padding: Padding for downsampling convolutions. Default: 0.
+        num_classes (int): number of classification classes. Default: 1000.
+    """
+
+    def __init__(
+        self,
+        block: Type[Union[SEBottleneck, SEResNetBottleneck, SEResNetBlock, SEResNeXtBottleneck]],
+        layers: List[int],
+        group: int,
+        reduction: int,
+        drop_rate: float = 0.0,
+        in_channels: int = 3,
+        inplanes: int = 64,
+        input3x3: bool = False,
+        downsample_kernel_size: int = 1,
+        downsample_padding: int = 0,
+        num_classes: int = 1000,
+    ) -> None:
+        super(SENet, self).__init__()
+        self.inplanes = inplanes
+        self.num_classes = num_classes
+        self.drop_rate = drop_rate
+        if input3x3:
+            self.layer0 = nn.SequentialCell([
+                nn.Conv2d(in_channels, 64, 3, stride=2, pad_mode="pad", padding=1, has_bias=False),
+                nn.BatchNorm2d(64),
+                nn.ReLU(),
+                nn.Conv2d(64, 64, 3, stride=1, pad_mode="pad", padding=1, has_bias=False),
+                nn.BatchNorm2d(64),
+                nn.ReLU(),
+                nn.Conv2d(64, inplanes, 3, stride=1, pad_mode="pad", padding=1, has_bias=False),
+                nn.BatchNorm2d(inplanes),
+                nn.ReLU()
+            ])
+        else:
+            self.layer0 = nn.SequentialCell([
+                nn.Conv2d(in_channels, inplanes, kernel_size=7, stride=2, pad_mode="pad",
+                          padding=3, has_bias=False),
+                nn.BatchNorm2d(inplanes),
+                nn.ReLU()
+            ])
+        self.pool0 = nn.MaxPool2d(3, stride=2, pad_mode="same")
+
+        self.layer1 = self._make_layer(block, planes=64, blocks=layers[0], group=group,
+                                       reduction=reduction, downsample_kernel_size=1,
+                                       downsample_padding=0)
+
+        self.layer2 = self._make_layer(block, planes=128, blocks=layers[1], stride=2,
+                                       group=group, reduction=reduction,
+                                       downsample_kernel_size=downsample_kernel_size,
+                                       downsample_padding=downsample_padding)
+
+        self.layer3 = self._make_layer(block, planes=256, blocks=layers[2], stride=2,
+                                       group=group, reduction=reduction,
+                                       downsample_kernel_size=downsample_kernel_size,
+                                       downsample_padding=downsample_padding)
+
+        self.layer4 = self._make_layer(block, planes=512, blocks=layers[3], stride=2,
+                                       group=group, reduction=reduction,
+                                       downsample_kernel_size=downsample_kernel_size,
+                                       downsample_padding=downsample_padding)
+
+        self.num_features = 512 * block.expansion
+
+        self.pool = GlobalAvgPooling()
+        if self.drop_rate > 0.:
+            self.dropout = Dropout(p=self.drop_rate)
+        self.classifier = nn.Dense(self.num_features, self.num_classes)
+
+        self._initialize_weights()
+
+    def _make_layer(
+        self,
+        block: Type[Union[SEBottleneck, SEResNetBottleneck, SEResNetBlock, SEResNeXtBottleneck]],
+        planes: int,
+        blocks: int,
+        group: int,
+        reduction: int,
+        stride: int = 1,
+        downsample_kernel_size: int = 1,
+        downsample_padding: int = 0,
+    ) -> nn.SequentialCell:
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.SequentialCell([
+                nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=downsample_kernel_size,
+                          stride=stride, pad_mode="pad", padding=downsample_padding, has_bias=False),
+                nn.BatchNorm2d(planes * block.expansion)
+            ])
+
+        layers = [block(self.inplanes, planes, group, reduction, stride, downsample)]
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes, group, reduction))
+
+        return nn.SequentialCell(layers)
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(mode="fan_out", nonlinearity="relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.HeUniform(mode="fan_in", nonlinearity="sigmoid"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.layer0(x)
+        x = self.pool0(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.pool(x)
+        if self.drop_rate > 0.0:
+            x = self.dropout(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.senet.senet154(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
@register_model
+def senet154(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["senet154"]
+    model = SENet(block=SEBottleneck, layers=[3, 8, 36, 3], group=64, reduction=16,
+                  downsample_kernel_size=3, downsample_padding=1,  inplanes=128, input3x3=True,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnet101(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
436
+437
+438
+439
+440
+441
+442
+443
+444
+445
@register_model
+def seresnet101(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnet101"]
+    model = SENet(block=SEResNetBottleneck, layers=[3, 4, 23, 3], group=1, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnet152(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
448
+449
+450
+451
+452
+453
+454
+455
+456
+457
@register_model
+def seresnet152(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnet152"]
+    model = SENet(block=SEResNetBottleneck, layers=[3, 8, 36, 3], group=1, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnet18(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
400
+401
+402
+403
+404
+405
+406
+407
+408
+409
@register_model
+def seresnet18(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnet18"]
+    model = SENet(block=SEResNetBlock, layers=[2, 2, 2, 2], group=1, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnet34(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
412
+413
+414
+415
+416
+417
+418
+419
+420
+421
@register_model
+def seresnet34(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnet34"]
+    model = SENet(block=SEResNetBlock, layers=[3, 4, 6, 3], group=1, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnet50(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
424
+425
+426
+427
+428
+429
+430
+431
+432
+433
@register_model
+def seresnet50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnet50"]
+    model = SENet(block=SEResNetBottleneck, layers=[3, 4, 6, 3], group=1, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnext101_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
484
+485
+486
+487
+488
+489
+490
+491
+492
+493
@register_model
+def seresnext101_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnext101_32x4d"]
+    model = SENet(block=SEResNeXtBottleneck, layers=[3, 4, 23, 3], group=32, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnext26_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
460
+461
+462
+463
+464
+465
+466
+467
+468
+469
@register_model
+def seresnext26_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnext26_32x4d"]
+    model = SENet(block=SEResNeXtBottleneck, layers=[2, 2, 2, 2], group=32, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.senet.seresnext50_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/senet.py +
472
+473
+474
+475
+476
+477
+478
+479
+480
+481
@register_model
+def seresnext50_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["seresnext50_32x4d"]
+    model = SENet(block=SEResNeXtBottleneck, layers=[3, 4, 6, 3], group=32, reduction=16,
+                  num_classes=num_classes, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

shufflenetv1

+ + +
+ + + +

+ mindcv.models.shufflenetv1.ShuffleNetV1 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

ShuffleNetV1 model class, based on +"ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" <https://arxiv.org/abs/1707.01083>_ # noqa: E501

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number of input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
model_size +
+

scale factor which controls the number of channels. Default: '2.0x'.

+
+

+ + TYPE: + str + + + DEFAULT: + '2.0x' + +

+
group +
+

number of group for group convolution. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Source code in mindcv/models/shufflenetv1.py +
119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
class ShuffleNetV1(nn.Cell):
+    r"""ShuffleNetV1 model class, based on
+    `"ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" <https://arxiv.org/abs/1707.01083>`_  # noqa: E501
+
+    Args:
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number of input channels. Default: 3.
+        model_size: scale factor which controls the number of channels. Default: '2.0x'.
+        group: number of group for group convolution. Default: 3.
+    """
+
+    def __init__(
+        self,
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        model_size: str = "2.0x",
+        group: int = 3,
+    ):
+        super().__init__()
+        self.stage_repeats = [4, 8, 4]
+        self.model_size = model_size
+        if group == 3:
+            if model_size == "0.5x":
+                self.stage_out_channels = [-1, 12, 120, 240, 480]
+            elif model_size == "1.0x":
+                self.stage_out_channels = [-1, 24, 240, 480, 960]
+            elif model_size == "1.5x":
+                self.stage_out_channels = [-1, 24, 360, 720, 1440]
+            elif model_size == "2.0x":
+                self.stage_out_channels = [-1, 48, 480, 960, 1920]
+            else:
+                raise NotImplementedError
+        elif group == 8:
+            if model_size == "0.5x":
+                self.stage_out_channels = [-1, 16, 192, 384, 768]
+            elif model_size == "1.0x":
+                self.stage_out_channels = [-1, 24, 384, 768, 1536]
+            elif model_size == "1.5x":
+                self.stage_out_channels = [-1, 24, 576, 1152, 2304]
+            elif model_size == "2.0x":
+                self.stage_out_channels = [-1, 48, 768, 1536, 3072]
+            else:
+                raise NotImplementedError
+
+        # building first layer
+        input_channel = self.stage_out_channels[1]
+        self.first_conv = nn.SequentialCell(
+            nn.Conv2d(in_channels, input_channel, kernel_size=3, stride=2, pad_mode="pad", padding=1),
+            nn.BatchNorm2d(input_channel),
+            nn.ReLU(),
+        )
+        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
+
+        features = []
+        for idxstage, numrepeat in enumerate(self.stage_repeats):
+            output_channel = self.stage_out_channels[idxstage + 2]
+            for i in range(numrepeat):
+                stride = 2 if i == 0 else 1
+                first_group = idxstage == 0 and i == 0
+                features.append(ShuffleV1Block(input_channel, output_channel,
+                                               group=group, first_group=first_group,
+                                               mid_channels=output_channel // 4, stride=stride))
+                input_channel = output_channel
+
+        self.features = nn.SequentialCell(features)
+        self.global_pool = GlobalAvgPooling()
+        self.classifier = nn.Dense(self.stage_out_channels[-1], num_classes, has_bias=False)
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        """Initialize weights for cells."""
+        for name, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                if "first" in name:
+                    cell.weight.set_data(
+                        init.initializer(init.Normal(0.01, 0), cell.weight.shape, cell.weight.dtype))
+                else:
+                    cell.weight.set_data(
+                        init.initializer(init.Normal(1.0 / cell.weight.shape[1], 0), cell.weight.shape,
+                                         cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.Normal(0.01, 0), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.first_conv(x)
+        x = self.max_pool(x)
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.global_pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g3_05(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 0.5 and 3 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
@register_model
+def shufflenet_v1_g3_05(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 0.5 and 3 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g3_05"]
+    model = ShuffleNetV1(group=3, model_size="0.5x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g3_10(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 1.0 and 3 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
@register_model
+def shufflenet_v1_g3_10(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 1.0 and 3 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g3_10"]
+    model = ShuffleNetV1(group=3, model_size="1.0x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g3_15(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 1.5 and 3 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
@register_model
+def shufflenet_v1_g3_15(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 1.5 and 3 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g3_15"]
+    model = ShuffleNetV1(group=3, model_size="1.5x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g3_20(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 2.0 and 3 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
@register_model
+def shufflenet_v1_g3_20(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 2.0 and 3 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g3_20"]
+    model = ShuffleNetV1(group=3, model_size="2.0x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g8_05(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 0.5 and 8 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
@register_model
+def shufflenet_v1_g8_05(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 0.5 and 8 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g8_05"]
+    model = ShuffleNetV1(group=8, model_size="0.5x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g8_10(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 1.0 and 8 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
@register_model
+def shufflenet_v1_g8_10(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 1.0 and 8 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g8_10"]
+    model = ShuffleNetV1(group=8, model_size="1.0x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g8_15(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 1.5 and 8 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
@register_model
+def shufflenet_v1_g8_15(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 1.5 and 8 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g8_15"]
+    model = ShuffleNetV1(group=8, model_size="1.5x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv1.shufflenet_v1_g8_20(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV1 model with width scaled by 2.0 and 8 groups of GPConv. +Refer to the base class models.ShuffleNetV1 for more details.

+ +
+ Source code in mindcv/models/shufflenetv1.py +
324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
@register_model
+def shufflenet_v1_g8_20(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV1:
+    """Get ShuffleNetV1 model with width scaled by 2.0 and 8 groups of GPConv.
+    Refer to the base class `models.ShuffleNetV1` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v1_g8_20"]
+    model = ShuffleNetV1(group=8, model_size="2.0x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

shufflenetv2

+ + +
+ + + +

+ mindcv.models.shufflenetv2.ShuffleNetV2 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

ShuffleNetV2 model class, based on +"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" <https://arxiv.org/abs/1807.11164>_

+ + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number of input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
model_size +
+

scale factor which controls the number of channels. Default: '1.5x'.

+
+

+ + TYPE: + str + + + DEFAULT: + '1.5x' + +

+
+ +
+ Source code in mindcv/models/shufflenetv2.py +
117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
class ShuffleNetV2(nn.Cell):
+    r"""ShuffleNetV2 model class, based on
+    `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" <https://arxiv.org/abs/1807.11164>`_
+
+    Args:
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number of input channels. Default: 3.
+        model_size: scale factor which controls the number of channels. Default: '1.5x'.
+    """
+
+    def __init__(
+        self,
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        model_size: str = "1.5x",
+    ):
+        super().__init__()
+
+        self.stage_repeats = [4, 8, 4]
+        self.model_size = model_size
+        if model_size == "0.5x":
+            self.stage_out_channels = [-1, 24, 48, 96, 192, 1024]
+        elif model_size == "1.0x":
+            self.stage_out_channels = [-1, 24, 116, 232, 464, 1024]
+        elif model_size == "1.5x":
+            self.stage_out_channels = [-1, 24, 176, 352, 704, 1024]
+        elif model_size == "2.0x":
+            self.stage_out_channels = [-1, 24, 244, 488, 976, 2048]
+        else:
+            raise NotImplementedError
+
+        # building first layer
+        input_channel = self.stage_out_channels[1]
+        self.first_conv = nn.SequentialCell([
+            nn.Conv2d(in_channels, input_channel, kernel_size=3, stride=2,
+                      pad_mode="pad", padding=1),
+            nn.BatchNorm2d(input_channel),
+            nn.ReLU(),
+        ])
+        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
+
+        self.features = []
+        for idxstage, numrepeat in enumerate(self.stage_repeats):
+            output_channel = self.stage_out_channels[idxstage + 2]
+            for i in range(numrepeat):
+                if i == 0:
+                    self.features.append(ShuffleV2Block(input_channel, output_channel,
+                                                        mid_channels=output_channel // 2, kernel_size=3, stride=2))
+                else:
+                    self.features.append(ShuffleV2Block(input_channel // 2, output_channel,
+                                                        mid_channels=output_channel // 2, kernel_size=3, stride=1))
+                input_channel = output_channel
+
+        self.features = nn.SequentialCell(self.features)
+
+        self.conv_last = nn.SequentialCell([
+            nn.Conv2d(input_channel, self.stage_out_channels[-1], kernel_size=1, stride=1),
+            nn.BatchNorm2d(self.stage_out_channels[-1]),
+            nn.ReLU()
+        ])
+        self.pool = GlobalAvgPooling()
+        self.classifier = nn.Dense(self.stage_out_channels[-1], num_classes, has_bias=False)
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        """Initialize weights for cells."""
+        for name, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                if "first" in name:
+                    cell.weight.set_data(
+                        init.initializer(init.Normal(0.01, 0), cell.weight.shape, cell.weight.dtype))
+                else:
+                    cell.weight.set_data(
+                        init.initializer(init.Normal(1.0 / cell.weight.shape[1], 0), cell.weight.shape,
+                                         cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.Normal(0.01, 0), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.first_conv(x)
+        x = self.max_pool(x)
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.conv_last(x)
+        x = self.pool(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv2.shufflenet_v2_x0_5(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV2 model with width scaled by 0.5. +Refer to the base class models.ShuffleNetV2 for more details.

+ +
+ Source code in mindcv/models/shufflenetv2.py +
220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
@register_model
+def shufflenet_v2_x0_5(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV2:
+    """Get ShuffleNetV2 model with width scaled by 0.5.
+    Refer to the base class `models.ShuffleNetV2` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v2_x0_5"]
+    model = ShuffleNetV2(model_size="0.5x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv2.shufflenet_v2_x1_0(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV2 model with width scaled by 1.0. +Refer to the base class models.ShuffleNetV2 for more details.

+ +
+ Source code in mindcv/models/shufflenetv2.py +
234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
@register_model
+def shufflenet_v2_x1_0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV2:
+    """Get ShuffleNetV2 model with width scaled by 1.0.
+    Refer to the base class `models.ShuffleNetV2` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v2_x1_0"]
+    model = ShuffleNetV2(model_size="1.0x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv2.shufflenet_v2_x1_5(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV2 model with width scaled by 1.5. +Refer to the base class models.ShuffleNetV2 for more details.

+ +
+ Source code in mindcv/models/shufflenetv2.py +
248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
@register_model
+def shufflenet_v2_x1_5(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV2:
+    """Get ShuffleNetV2 model with width scaled by 1.5.
+    Refer to the base class `models.ShuffleNetV2` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v2_x1_5"]
+    model = ShuffleNetV2(model_size="1.5x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.shufflenetv2.shufflenet_v2_x2_0(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get ShuffleNetV2 model with width scaled by 2.0. +Refer to the base class models.ShuffleNetV2 for more details.

+ +
+ Source code in mindcv/models/shufflenetv2.py +
262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
@register_model
+def shufflenet_v2_x2_0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ShuffleNetV2:
+    """Get ShuffleNetV2 model with width scaled by 2.0.
+    Refer to the base class `models.ShuffleNetV2` for more details.
+    """
+    default_cfg = default_cfgs["shufflenet_v2_x2_0"]
+    model = ShuffleNetV2(model_size="2.0x", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

sknet

+ + +
+ + + +

+ mindcv.models.sknet.SKNet + + +

+ + +
+

+ Bases: ResNet

+ + +

SKNet model class, based on +"Selective Kernel Networks" <https://arxiv.org/abs/1903.06586>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
block +
+

block of sknet.

+
+

+ + TYPE: + Type[nn.Cell] + +

+
layers +
+

number of layers of each stage.

+
+

+ + TYPE: + List[int] + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
groups +
+

number of groups for group conv in blocks. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
base_width +
+

base width of pre group hidden channel in blocks. Default: 64.

+
+

+ + TYPE: + int + + + DEFAULT: + 64 + +

+
norm +
+

normalization layer in blocks. Default: None.

+
+

+ + TYPE: + Optional[nn.Cell] + + + DEFAULT: + None + +

+
sk_kwargs +
+

kwargs of selective kernel. Default: None.

+
+

+ + TYPE: + Optional[Dict] + + + DEFAULT: + None + +

+
+ +
+ Source code in mindcv/models/sknet.py +
144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
class SKNet(ResNet):
+    r"""SKNet model class, based on
+    `"Selective Kernel Networks" <https://arxiv.org/abs/1903.06586>`_
+
+    Args:
+        block: block of sknet.
+        layers: number of layers of each stage.
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number the channels of the input. Default: 3.
+        groups: number of groups for group conv in blocks. Default: 1.
+        base_width: base width of pre group hidden channel in blocks. Default: 64.
+        norm: normalization layer in blocks. Default: None.
+        sk_kwargs: kwargs of selective kernel. Default: None.
+    """
+
+    def __init__(
+        self,
+        block: Type[nn.Cell],
+        layers: List[int],
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        groups: int = 1,
+        base_width: int = 64,
+        norm: Optional[nn.Cell] = None,
+        sk_kwargs: Optional[Dict] = None,
+    ) -> None:
+        self.sk_kwargs: Optional[Dict] = sk_kwargs  # make pylint happy
+        super().__init__(block, layers, num_classes, in_channels, groups, base_width, norm)
+
+    def _make_layer(
+        self,
+        block: Type[Union[SelectiveKernelBasic, SelectiveKernelBottleneck]],
+        channels: int,
+        block_nums: int,
+        stride: int = 1,
+    ) -> nn.SequentialCell:
+        down_sample = None
+
+        if stride != 1 or self.input_channels != channels * block.expansion:
+            down_sample = nn.SequentialCell([
+                nn.Conv2d(self.input_channels, channels * block.expansion, kernel_size=1, stride=stride),
+                self.norm(channels * block.expansion)
+            ])
+
+        layers = []
+        layers.append(
+            block(
+                self.input_channels,
+                channels,
+                stride=stride,
+                down_sample=down_sample,
+                groups=self.groups,
+                base_width=self.base_with,
+                norm=self.norm,
+                sk_kwargs=self.sk_kwargs,
+            )
+        )
+        self.input_channels = channels * block.expansion
+
+        for _ in range(1, block_nums):
+            layers.append(
+                block(
+                    self.input_channels,
+                    channels,
+                    groups=self.groups,
+                    base_width=self.base_with,
+                    norm=self.norm,
+                    sk_kwargs=self.sk_kwargs,
+                )
+            )
+
+        return nn.SequentialCell(layers)
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.sknet.skresnet18(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 18 layers SKNet model. +Refer to the base class models.SKNet for more details.

+ +
+ Source code in mindcv/models/sknet.py +
218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
@register_model
+def skresnet18(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ResNet:
+    """Get 18 layers SKNet model.
+    Refer to the base class `models.SKNet` for more details.
+    """
+    default_cfg = default_cfgs["skresnet18"]
+    sk_kwargs = dict(rd_ratio=1 / 8, rd_divisor=16, split_input=True)
+    model = SKNet(SelectiveKernelBasic, [2, 2, 2, 2], num_classes=num_classes, in_channels=in_channels,
+                  sk_kwargs=sk_kwargs, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.sknet.skresnet34(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 34 layers SKNet model. +Refer to the base class models.SKNet for more details.

+ +
+ Source code in mindcv/models/sknet.py +
234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
@register_model
+def skresnet34(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ResNet:
+    """Get 34 layers SKNet model.
+    Refer to the base class `models.SKNet` for more details.
+    """
+    default_cfg = default_cfgs["skresnet34"]
+    sk_kwargs = dict(rd_ratio=1 / 8, rd_divisor=16, split_input=True)
+    model = SKNet(SelectiveKernelBasic, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels,
+                  sk_kwargs=sk_kwargs, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.sknet.skresnet50(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers SKNet model. +Refer to the base class models.SKNet for more details.

+ +
+ Source code in mindcv/models/sknet.py +
250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
@register_model
+def skresnet50(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ResNet:
+    """Get 50 layers SKNet model.
+    Refer to the base class `models.SKNet` for more details.
+    """
+    default_cfg = default_cfgs["skresnet50"]
+    sk_kwargs = dict(split_input=True)
+    model = SKNet(SelectiveKernelBottleneck, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels,
+                  sk_kwargs=sk_kwargs, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.sknet.skresnext50_32x4d(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 50 layers SKNeXt model with 32 groups of GPConv. +Refer to the base class models.SKNet for more details.

+ +
+ Source code in mindcv/models/sknet.py +
266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
@register_model
+def skresnext50_32x4d(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> ResNet:
+    """Get 50 layers SKNeXt model with 32 groups of GPConv.
+    Refer to the base class `models.SKNet` for more details.
+    """
+    default_cfg = default_cfgs["skresnext50_32x4d"]
+    sk_kwargs = dict(rd_ratio=1 / 16, rd_divisor=32, split_input=False)
+    model = SKNet(SelectiveKernelBottleneck, [3, 4, 6, 3], num_classes=num_classes, in_channels=in_channels,
+                  sk_kwargs=sk_kwargs, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

squeezenet

+ + +
+ + + +

+ mindcv.models.squeezenet.SqueezeNet + + +

+ + +
+

+ Bases: nn.Cell

+ + +

SqueezeNet model class, based on +"SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size" <https://arxiv.org/abs/1602.07360>_ # noqa: E501

+

.. note:: + Important: In contrast to the other models the inception_v3 expects tensors with a size of + N x 3 x 227 x 227, so ensure your images are sized accordingly.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
version +
+

version of the architecture, '1_0' or '1_1'. Default: '1_0'.

+
+

+ + TYPE: + str + + + DEFAULT: + '1_0' + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
drop_rate +
+

dropout rate of the classifier. Default: 0.5.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.5 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
+ +
+ Source code in mindcv/models/squeezenet.py +
 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
class SqueezeNet(nn.Cell):
+    r"""SqueezeNet model class, based on
+    `"SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size" <https://arxiv.org/abs/1602.07360>`_  # noqa: E501
+
+    .. note::
+        **Important**: In contrast to the other models the inception_v3 expects tensors with a size of
+        N x 3 x 227 x 227, so ensure your images are sized accordingly.
+
+    Args:
+        version: version of the architecture, '1_0' or '1_1'. Default: '1_0'.
+        num_classes: number of classification classes. Default: 1000.
+        drop_rate: dropout rate of the classifier. Default: 0.5.
+        in_channels: number the channels of the input. Default: 3.
+    """
+
+    def __init__(
+        self,
+        version: str = "1_0",
+        num_classes: int = 1000,
+        drop_rate: float = 0.5,
+        in_channels: int = 3,
+    ) -> None:
+        super().__init__()
+        if version == "1_0":
+            self.features = nn.SequentialCell([
+                nn.Conv2d(in_channels, 96, kernel_size=7, stride=2, pad_mode="valid", has_bias=True),
+                nn.ReLU(),
+                nn.MaxPool2d(kernel_size=3, stride=2),
+                Fire(96, 16, 64, 64),
+                Fire(128, 16, 64, 64),
+                Fire(128, 32, 128, 128),
+                nn.MaxPool2d(kernel_size=3, stride=2),
+                Fire(256, 32, 128, 128),
+                Fire(256, 48, 192, 192),
+                Fire(384, 48, 192, 192),
+                Fire(384, 64, 256, 256),
+                nn.MaxPool2d(kernel_size=3, stride=2),
+                Fire(512, 64, 256, 256),
+            ])
+        elif version == "1_1":
+            self.features = nn.SequentialCell([
+                nn.Conv2d(in_channels, 64, kernel_size=3, stride=2, padding=1, pad_mode="pad", has_bias=True),
+                nn.ReLU(),
+                nn.MaxPool2d(kernel_size=3, stride=2),
+                Fire(64, 16, 64, 64),
+                Fire(128, 16, 64, 64),
+                nn.MaxPool2d(kernel_size=3, stride=2),
+                Fire(128, 32, 128, 128),
+                Fire(256, 32, 128, 128),
+                nn.MaxPool2d(kernel_size=3, stride=2),
+                Fire(256, 48, 192, 192),
+                Fire(384, 48, 192, 192),
+                Fire(384, 64, 256, 256),
+                Fire(512, 64, 256, 256),
+            ])
+        else:
+            raise ValueError(f"Unsupported SqueezeNet version {version}: 1_0 or 1_1 expected")
+
+        self.final_conv = nn.Conv2d(512, num_classes, kernel_size=1, has_bias=True)
+        self.classifier = nn.SequentialCell([
+            Dropout(p=drop_rate),
+            self.final_conv,
+            nn.ReLU(),
+            GlobalAvgPooling()
+        ])
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                if cell is self.final_conv:
+                    cell.weight.set_data(init.initializer(init.Normal(), cell.weight.shape, cell.weight.dtype))
+                else:
+                    cell.weight.set_data(init.initializer(init.HeUniform(), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.squeezenet.squeezenet1_0(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get SqueezeNet model of version 1.0. +Refer to the base class models.SqueezeNet for more details.

+ +
+ Source code in mindcv/models/squeezenet.py +
153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
@register_model
+def squeezenet1_0(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> SqueezeNet:
+    """Get SqueezeNet model of version 1.0.
+    Refer to the base class `models.SqueezeNet` for more details.
+    """
+    default_cfg = default_cfgs["squeezenet1_0"]
+    model = SqueezeNet(version="1_0", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.squeezenet.squeezenet1_1(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get SqueezeNet model of version 1.1. +Refer to the base class models.SqueezeNet for more details.

+ +
+ Source code in mindcv/models/squeezenet.py +
167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
@register_model
+def squeezenet1_1(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> SqueezeNet:
+    """Get SqueezeNet model of version 1.1.
+    Refer to the base class `models.SqueezeNet` for more details.
+    """
+    default_cfg = default_cfgs["squeezenet1_1"]
+    model = SqueezeNet(version="1_1", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

swintransformer

+ + +
+ + + +

+ mindcv.models.swintransformer.SwinTransformer + + +

+ + +
+

+ Bases: nn.Cell

+ + +

SwinTransformer model class, based on +"Swin Transformer: Hierarchical Vision Transformer using Shifted Windows" <https://arxiv.org/pdf/2103.14030>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
image_size +
+

Input image size. Default 224

+
+

+ + TYPE: + int | tuple(int + + + DEFAULT: + 224 + +

+
patch_size +
+

Patch size. Default: 4

+
+

+ + TYPE: + int | tuple(int + + + DEFAULT: + 4 + +

+
in_chans +
+

Number of input image channels. Default: 3

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

Number of classes for classification head. Default: 1000

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
embed_dim +
+

Patch embedding dimension. Default: 96

+
+

+ + TYPE: + int + + + DEFAULT: + 96 + +

+
depths +
+

Depth of each Swin Transformer layer.

+
+

+ + TYPE: + tuple(int + + + DEFAULT: + None + +

+
num_heads +
+

Number of attention heads in different layers.

+
+

+ + TYPE: + tuple(int + + + DEFAULT: + None + +

+
window_size +
+

Window size. Default: 7

+
+

+ + TYPE: + int + + + DEFAULT: + 7 + +

+
mlp_ratio +
+

Ratio of mlp hidden dim to embedding dim. Default: 4

+
+

+ + TYPE: + float + + + DEFAULT: + 4.0 + +

+
qkv_bias +
+

If True, add a learnable bias to query, key, value. Default: True

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
qk_scale +
+

Override default qk scale of head_dim ** -0.5 if set. Default: None

+
+

+ + TYPE: + float + + + DEFAULT: + None + +

+
drop_rate +
+

Dropout rate. Default: 0

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
attn_drop_rate +
+

Attention dropout rate. Default: 0

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
drop_path_rate +
+

Stochastic depth rate. Default: 0.1

+
+

+ + TYPE: + float + + + DEFAULT: + 0.1 + +

+
norm_layer +
+

Normalization layer. Default: nn.LayerNorm.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + nn.LayerNorm + +

+
ape +
+

If True, add absolute position embedding to the patch embedding. Default: False

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
patch_norm +
+

If True, add normalization after patch embedding. Default: True

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
+ +
+ Source code in mindcv/models/swintransformer.py +
567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
+613
+614
+615
+616
+617
+618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
+663
+664
+665
+666
+667
+668
+669
+670
+671
+672
+673
+674
+675
+676
+677
+678
+679
+680
+681
+682
+683
+684
+685
+686
+687
+688
+689
+690
+691
+692
+693
+694
+695
+696
class SwinTransformer(nn.Cell):
+    r"""SwinTransformer model class, based on
+    `"Swin Transformer: Hierarchical Vision Transformer using Shifted Windows" <https://arxiv.org/pdf/2103.14030>`_
+
+    Args:
+        image_size (int | tuple(int)): Input image size. Default 224
+        patch_size (int | tuple(int)): Patch size. Default: 4
+        in_chans (int): Number of input image channels. Default: 3
+        num_classes (int): Number of classes for classification head. Default: 1000
+        embed_dim (int): Patch embedding dimension. Default: 96
+        depths (tuple(int)): Depth of each Swin Transformer layer.
+        num_heads (tuple(int)): Number of attention heads in different layers.
+        window_size (int): Window size. Default: 7
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4
+        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None
+        drop_rate (float): Dropout rate. Default: 0
+        attn_drop_rate (float): Attention dropout rate. Default: 0
+        drop_path_rate (float): Stochastic depth rate. Default: 0.1
+        norm_layer (nn.Cell): Normalization layer. Default: nn.LayerNorm.
+        ape (bool): If True, add absolute position embedding to the patch embedding. Default: False
+        patch_norm (bool): If True, add normalization after patch embedding. Default: True
+    """
+
+    def __init__(
+        self,
+        image_size: int = 224,
+        patch_size: int = 4,
+        in_chans: int = 3,
+        num_classes: int = 1000,
+        embed_dim: int = 96,
+        depths: Optional[List[int]] = None,
+        num_heads: Optional[List[int]] = None,
+        window_size: int = 7,
+        mlp_ratio: float = 4.0,
+        qkv_bias: bool = True,
+        qk_scale: Optional[int] = None,
+        drop_rate: float = 0.0,
+        attn_drop_rate: float = 0.0,
+        drop_path_rate: float = 0.1,
+        norm_layer: Optional[nn.Cell] = nn.LayerNorm,
+        ape: bool = False,
+        patch_norm: bool = True,
+    ) -> None:
+        super().__init__()
+
+        self.num_classes = num_classes
+        self.num_layers = len(depths)
+        self.embed_dim = embed_dim
+        self.ape = ape
+        self.patch_norm = patch_norm
+        self.num_features = int(embed_dim * 2 ** (self.num_layers - 1))
+        self.mlp_ratio = mlp_ratio
+
+        # split image into non-overlapping patches
+        self.patch_embed = PatchEmbed(
+            image_size=image_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None)
+        num_patches = self.patch_embed.num_patches
+        patches_resolution = self.patch_embed.patches_resolution
+        self.patches_resolution = patches_resolution
+
+        # absolute position embedding
+        if self.ape:
+            self.absolute_pos_embed = Parameter(Tensor(np.zeros(1, num_patches, embed_dim), dtype=mstype.float32))
+
+        self.pos_drop = Dropout(p=drop_rate)
+
+        # stochastic depth
+        dpr = [x for x in np.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+
+        # build layers
+        self.layers = nn.CellList()
+        for i_layer in range(self.num_layers):
+            layer = BasicLayer(dim=int(embed_dim * 2 ** i_layer),
+                               input_resolution=(patches_resolution[0] // (2 ** i_layer),
+                                                 patches_resolution[1] // (2 ** i_layer)),
+                               depth=depths[i_layer],
+                               num_heads=num_heads[i_layer],
+                               window_size=window_size,
+                               mlp_ratio=self.mlp_ratio,
+                               qkv_bias=qkv_bias, qk_scale=qk_scale,
+                               drop=drop_rate, attn_drop=attn_drop_rate,
+                               drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
+                               norm_layer=norm_layer,
+                               downsample=PatchMerging if (i_layer < self.num_layers - 1) else None)
+            self.layers.append(layer)
+
+        self.norm = norm_layer([self.num_features, ], epsilon=1e-5)
+        self.classifier = nn.Dense(in_channels=self.num_features,
+                                   out_channels=num_classes, has_bias=True) if num_classes > 0 else Identity()
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Dense):
+                cell.weight.set_data(init.initializer(init.TruncatedNormal(sigma=0.02),
+                                                      cell.weight.shape, cell.weight.dtype))
+                if isinstance(cell, nn.Dense) and cell.bias is not None:
+                    cell.bias.set_data(init.initializer(init.Zero(), cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.LayerNorm):
+                cell.gamma.set_data(init.initializer(init.One(), cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer(init.Zero(), cell.beta.shape, cell.beta.dtype))
+
+    def no_weight_decay(self) -> None:
+        return {"absolute_pos_embed"}
+
+    def no_weight_decay_keywords(self) -> None:
+        return {"relative_position_bias_table"}
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.classifier(x)
+        return x
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.patch_embed(x)
+        if self.ape:
+            x = x + self.absolute_pos_embed
+        x = self.pos_drop(x)
+        for layer in self.layers:
+            x = layer(x)
+        x = self.norm(x)  # B L C
+        x = ops.mean(ops.transpose(x, (0, 2, 1)), 2)  # B C 1
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.swintransformer.swin_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get SwinTransformer tiny model. +Refer to the base class 'models.SwinTransformer' for more details.

+ +
+ Source code in mindcv/models/swintransformer.py +
699
+700
+701
+702
+703
+704
+705
+706
+707
+708
+709
+710
+711
+712
+713
+714
@register_model
+def swin_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> SwinTransformer:
+    """Get SwinTransformer tiny model.
+    Refer to the base class 'models.SwinTransformer' for more details.
+    """
+    default_cfg = default_cfgs["swin_tiny"]
+    model = SwinTransformer(image_size=224, patch_size=4, in_chans=in_channels, num_classes=num_classes,
+                            embed_dim=96, depths=[2, 2, 6, 2], num_heads=[3, 6, 12, 24], window_size=7,
+                            mlp_ratio=4., qkv_bias=True, qk_scale=None,
+                            drop_rate=0., attn_drop_rate=0., drop_path_rate=0.2,
+                            norm_layer=nn.LayerNorm, ape=False, patch_norm=True, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

swintransformerv2

+ + +
+ + + +

+ mindcv.models.swintransformerv2.SwinTransformerV2 + + +

+ + +
+

+ Bases: nn.Cell

+ + +

SwinTransformerV2 model class, based on +"Swin Transformer V2: Scaling Up Capacity and Resolution" <https://arxiv.org/abs/2111.09883>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
image_size +
+

Input image size. Default: 256.

+
+

+ + TYPE: + int + + + DEFAULT: + 256 + +

+
patch_size +
+

Patch size. Default: 4.

+
+

+ + TYPE: + int + + + DEFAULT: + 4 + +

+
in_channels +
+

Number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

Number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
embed_dim +
+

Patch embedding dimension. Default: 96.

+
+

+ + TYPE: + int + + + DEFAULT: + 96 + +

+
depths +
+

Depth of each Swin Transformer layer. Default: [2, 2, 6, 2].

+
+

+ + TYPE: + List[int] + + + DEFAULT: + [2, 2, 6, 2] + +

+
num_heads +
+

Number of attention heads in different layers. Default: [3, 6, 12, 24].

+
+

+ + TYPE: + List[int] + + + DEFAULT: + [3, 6, 12, 24] + +

+
window_size +
+

Window size. Default: 7.

+
+

+ + TYPE: + int + + + DEFAULT: + 7 + +

+
mlp_ratio +
+

Ratio of mlp hidden dim to embedding dim. Default: 4.

+
+

+ + TYPE: + float + + + DEFAULT: + 4.0 + +

+
qkv_bias +
+

If True, add a bias for query, key, value. Default: True.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
drop_rate +
+

Drop probability for the Dropout layer. Default: 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
attn_drop_rate +
+

Attention drop probability for the Dropout layer. Default: 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
drop_path_rate +
+

Stochastic depth rate. Default: 0.1.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.1 + +

+
norm_layer +
+

Normalization layer. Default: nn.LayerNorm.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + nn.LayerNorm + +

+
patch_norm +
+

If True, add normalization after patch embedding. Default: True.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
pretrained_window_sizes +
+

Pretrained window sizes of each layer. Default: [0, 0, 0, 0].

+
+

+ + TYPE: + List[int] + + + DEFAULT: + [0, 0, 0, 0] + +

+
+ +
+ Source code in mindcv/models/swintransformerv2.py +
521
+522
+523
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
+613
+614
+615
+616
+617
+618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
class SwinTransformerV2(nn.Cell):
+    r"""SwinTransformerV2 model class, based on
+    `"Swin Transformer V2: Scaling Up Capacity and Resolution" <https://arxiv.org/abs/2111.09883>`_
+
+    Args:
+        image_size: Input image size. Default: 256.
+        patch_size: Patch size. Default: 4.
+        in_channels: Number the channels of the input. Default: 3.
+        num_classes: Number of classification classes. Default: 1000.
+        embed_dim: Patch embedding dimension. Default: 96.
+        depths: Depth of each Swin Transformer layer. Default: [2, 2, 6, 2].
+        num_heads: Number of attention heads in different layers. Default: [3, 6, 12, 24].
+        window_size: Window size. Default: 7.
+        mlp_ratio: Ratio of mlp hidden dim to embedding dim. Default: 4.
+        qkv_bias: If True, add a bias for query, key, value. Default: True.
+        drop_rate: Drop probability for the Dropout layer. Default: 0.
+        attn_drop_rate: Attention drop probability for the Dropout layer. Default: 0.
+        drop_path_rate: Stochastic depth rate. Default: 0.1.
+        norm_layer: Normalization layer. Default: nn.LayerNorm.
+        patch_norm: If True, add normalization after patch embedding. Default: True.
+        pretrained_window_sizes: Pretrained window sizes of each layer. Default: [0, 0, 0, 0].
+    """
+
+    def __init__(
+        self,
+        image_size: int = 256,
+        patch_size: int = 4,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+        embed_dim: int = 96,
+        depths: List[int] = [2, 2, 6, 2],
+        num_heads: List[int] = [3, 6, 12, 24],
+        window_size: int = 7,
+        mlp_ratio: float = 4.0,
+        qkv_bias: bool = True,
+        drop_rate: float = 0.0,
+        attn_drop_rate: float = 0.0,
+        drop_path_rate: float = 0.1,
+        norm_layer: nn.Cell = nn.LayerNorm,
+        patch_norm: bool = True,
+        pretrained_window_sizes: List[int] = [0, 0, 0, 0],
+    ) -> None:
+        super().__init__()
+        self.num_classes = num_classes
+        self.num_layers = len(depths)
+        self.embed_dim = embed_dim
+        self.in_channels = in_channels
+        self.patch_size = patch_size
+        self.patch_norm = patch_norm
+        self.num_features = int(embed_dim * 2 ** (self.num_layers - 1))
+        self.mlp_ratio = mlp_ratio
+
+        # split image into non-overlapping patches
+        self.patch_embed = PatchEmbed(
+            image_size=image_size, patch_size=patch_size, in_chans=in_channels, embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None)
+        num_patches = self.patch_embed.num_patches
+        self.num_patches = num_patches
+        patches_resolution = self.patch_embed.patches_resolution
+        self.patches_resolution = patches_resolution
+
+        self.pos_drop = Dropout(p=drop_rate)
+
+        # stochastic depth
+        dpr = [x for x in np.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+
+        # build layers
+        self.layers = nn.CellList()
+        self.final_seq = num_patches  # downsample seq_length
+        for i_layer in range(self.num_layers):
+            layer = BasicLayer(
+                dim=int(embed_dim * 2**i_layer),
+                input_resolution=(patches_resolution[0] // (2**i_layer),
+                                  patches_resolution[1] // (2**i_layer)),
+                depth=depths[i_layer],
+                num_heads=num_heads[i_layer],
+                window_size=window_size,
+                mlp_ratio=self.mlp_ratio,
+                qkv_bias=qkv_bias,
+                drop=drop_rate, attn_drop=attn_drop_rate,
+                drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
+                norm_layer=norm_layer,
+                downsample=PatchMerging if (i_layer < self.num_layers - 1) else None,
+                pretrained_window_size=pretrained_window_sizes[i_layer]
+            )
+            # downsample seq_length
+            if i_layer < self.num_layers - 1:
+                self.final_seq = self.final_seq // 4
+            self.layers.append(layer)
+        self.head = nn.Dense(self.num_features, self.num_classes)
+
+        self.norm = norm_layer([self.num_features, ], epsilon=1e-6)
+        self.avgpool = ops.ReduceMean(keep_dims=False)
+
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(init.initializer(init.HeUniform(), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.LayerNorm):
+                cell.gamma.set_data(init.initializer("ones", cell.gamma.shape, cell.gamma.dtype))
+                cell.beta.set_data(init.initializer("zeros", cell.beta.shape, cell.beta.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.TruncatedNormal(sigma=0.02), cell.weight.shape, cell.weight.dtype)
+                )
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.patch_embed(x)
+        x = self.pos_drop(x)
+        for layer in self.layers:
+            x = layer(x)
+        x = self.norm(x)  # B L C
+        x = self.avgpool(ops.transpose(x, (0, 2, 1)), 2)  # B C 1
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.head(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.swintransformerv2.swinv2_base_window16(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/swintransformerv2.py +
717
+718
+719
+720
+721
+722
+723
+724
+725
+726
+727
@register_model
+def swinv2_base_window16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["swinv2_base_window16"]
+    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,
+                              window_size=16, embed_dim=128, depths=[2, 2, 18, 2],
+                              num_heads=[4, 8, 16, 32], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.swintransformerv2.swinv2_base_window8(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/swintransformerv2.py +
704
+705
+706
+707
+708
+709
+710
+711
+712
+713
+714
@register_model
+def swinv2_base_window8(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["swinv2_base_window8"]
+    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,
+                              window_size=8, embed_dim=128, depths=[2, 2, 18, 2],
+                              num_heads=[4, 8, 16, 32], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.swintransformerv2.swinv2_small_window16(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/swintransformerv2.py +
691
+692
+693
+694
+695
+696
+697
+698
+699
+700
+701
@register_model
+def swinv2_small_window16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["swinv2_small_window16"]
+    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,
+                              window_size=16, embed_dim=96, depths=[2, 2, 18, 2],
+                              num_heads=[3, 6, 12, 24], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.swintransformerv2.swinv2_small_window8(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/swintransformerv2.py +
678
+679
+680
+681
+682
+683
+684
+685
+686
+687
+688
@register_model
+def swinv2_small_window8(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["swinv2_small_window8"]
+    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,
+                              window_size=8, embed_dim=96, depths=[2, 2, 18, 2],
+                              num_heads=[3, 6, 12, 24], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.swintransformerv2.swinv2_tiny_window16(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/swintransformerv2.py +
665
+666
+667
+668
+669
+670
+671
+672
+673
+674
+675
@register_model
+def swinv2_tiny_window16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["swinv2_tiny_window16"]
+    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,
+                              window_size=16, embed_dim=96, depths=[2, 2, 6, 2],
+                              num_heads=[3, 6, 12, 24], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.swintransformerv2.swinv2_tiny_window8(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +
+ Source code in mindcv/models/swintransformerv2.py +
652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
@register_model
+def swinv2_tiny_window8(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    default_cfg = default_cfgs["swinv2_tiny_window8"]
+    model = SwinTransformerV2(in_channels=in_channels, num_classes=num_classes,
+                              window_size=8, embed_dim=96, depths=[2, 2, 6, 2],
+                              num_heads=[3, 6, 12, 24], **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

vgg

+ + +
+ + + +

+ mindcv.models.vgg.VGG + + +

+ + +
+

+ Bases: nn.Cell

+ + +

VGGNet model class, based on +"Very Deep Convolutional Networks for Large-Scale Image Recognition" <https://arxiv.org/abs/1409.1556>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
model_name +
+

name of the architecture. 'vgg11', 'vgg13', 'vgg16' or 'vgg19'.

+
+

+ + TYPE: + str + +

+
batch_norm +
+

use batch normalization or not. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

number the channels of the input. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
drop_rate +
+

dropout rate of the classifier. Default: 0.5.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.5 + +

+
+ +
+ Source code in mindcv/models/vgg.py +
 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
class VGG(nn.Cell):
+    r"""VGGNet model class, based on
+    `"Very Deep Convolutional Networks for Large-Scale Image Recognition" <https://arxiv.org/abs/1409.1556>`_
+
+    Args:
+        model_name: name of the architecture. 'vgg11', 'vgg13', 'vgg16' or 'vgg19'.
+        batch_norm: use batch normalization or not. Default: False.
+        num_classes: number of classification classes. Default: 1000.
+        in_channels: number the channels of the input. Default: 3.
+        drop_rate: dropout rate of the classifier. Default: 0.5.
+    """
+
+    def __init__(
+        self,
+        model_name: str,
+        batch_norm: bool = False,
+        num_classes: int = 1000,
+        in_channels: int = 3,
+        drop_rate: float = 0.5,
+    ) -> None:
+        super().__init__()
+        cfg = cfgs[model_name]
+        self.features = _make_layers(cfg, batch_norm=batch_norm, in_channels=in_channels)
+        self.flatten = nn.Flatten()
+        self.classifier = nn.SequentialCell([
+            nn.Dense(512 * 7 * 7, 4096),
+            nn.ReLU(),
+            Dropout(p=drop_rate),
+            nn.Dense(4096, 4096),
+            nn.ReLU(),
+            Dropout(p=drop_rate),
+            nn.Dense(4096, num_classes),
+        ])
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        """Initialize weights for cells."""
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Conv2d):
+                cell.weight.set_data(
+                    init.initializer(init.HeNormal(math.sqrt(5), mode="fan_out", nonlinearity="relu"),
+                                     cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(
+                        init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.Dense):
+                cell.weight.set_data(
+                    init.initializer(init.Normal(0.01), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(init.initializer("zeros", cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        x = self.flatten(x)
+        x = self.classifier(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.vgg.vgg11(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 11 layers VGG model. +Refer to the base class models.VGG for more details.

+ +
+ Source code in mindcv/models/vgg.py +
138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
@register_model
+def vgg11(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VGG:
+    """Get 11 layers VGG model.
+    Refer to the base class `models.VGG` for more details.
+    """
+    default_cfg = default_cfgs["vgg11"]
+    model = VGG(model_name="vgg11", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vgg.vgg13(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 13 layers VGG model. +Refer to the base class models.VGG for more details.

+ +
+ Source code in mindcv/models/vgg.py +
152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
@register_model
+def vgg13(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VGG:
+    """Get 13 layers VGG model.
+    Refer to the base class `models.VGG` for more details.
+    """
+    default_cfg = default_cfgs["vgg13"]
+    model = VGG(model_name="vgg13", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vgg.vgg16(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 16 layers VGG model. +Refer to the base class models.VGG for more details.

+ +
+ Source code in mindcv/models/vgg.py +
166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
@register_model
+def vgg16(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VGG:
+    """Get 16 layers VGG model.
+    Refer to the base class `models.VGG` for more details.
+    """
+    default_cfg = default_cfgs["vgg16"]
+    model = VGG(model_name="vgg16", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vgg.vgg19(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get 19 layers VGG model. +Refer to the base class models.VGG for more details.

+ +
+ Source code in mindcv/models/vgg.py +
180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
@register_model
+def vgg19(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VGG:
+    """Get 19 layers VGG model.
+    Refer to the base class `models.VGG` for more details.
+    """
+    default_cfg = default_cfgs["vgg19"]
+    model = VGG(model_name="vgg19", num_classes=num_classes, in_channels=in_channels, **kwargs)
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +

visformer

+ + +
+ + + +

+ mindcv.models.visformer.Visformer + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Visformer model class, based on +'"Visformer: The Vision-friendly Transformer" +https://arxiv.org/pdf/2104.12533.pdf'

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
image_size +
+

images input size. Default: 224.

+
+

+ + TYPE: + int) + +

+
number +
+

32.

+
+

+ + TYPE: + the channels of the input. Default + +

+
num_classes +
+

number of classification classes. Default: 1000.

+
+

+ + TYPE: + int) + + + DEFAULT: + 1000 + +

+
embed_dim +
+

embedding dimension in all head. Default: 384.

+
+

+ + TYPE: + int) + + + DEFAULT: + 384 + +

+
depth +
+

model block depth. Default: None.

+
+

+ + TYPE: + int) + + + DEFAULT: + None + +

+
num_heads +
+

number of heads. Default: None.

+
+

+ + TYPE: + int) + + + DEFAULT: + None + +

+
mlp_ratio +
+

ratio of hidden features in Mlp. Default: 4.

+
+

+ + TYPE: + float) + + + DEFAULT: + 4.0 + +

+
qkv_bias +
+

have bias in qkv layers or not. Default: False.

+
+

+ + TYPE: + bool) + + + DEFAULT: + False + +

+
qk_scale +
+

Override default qk scale of head_dim ** -0.5 if set.

+
+

+ + TYPE: + float) + + + DEFAULT: + None + +

+
drop_rate +
+

dropout rate. Default: 0.

+
+

+ + TYPE: + float) + + + DEFAULT: + 0.0 + +

+
attn_drop_rate +
+

attention layers dropout rate. Default: 0.

+
+

+ + TYPE: + float) + + + DEFAULT: + 0.0 + +

+
drop_path_rate +
+

drop path rate. Default: 0.1.

+
+

+ + TYPE: + float) + + + DEFAULT: + 0.1 + +

+
attn_stage +
+

block will have a attention layer if value = '1' else not. Default: '1111'.

+
+

+ + TYPE: + str) + + + DEFAULT: + '1111' + +

+
pos_embed +
+

position embedding. Default: True.

+
+

+ + TYPE: + bool) + + + DEFAULT: + True + +

+
spatial_conv +
+

block will have a spatial convolution layer if value = '1' else not. Default: '1111'.

+
+

+ + TYPE: + str) + + + DEFAULT: + '1111' + +

+
group +
+

convolution group. Default: 8.

+
+

+ + TYPE: + int) + + + DEFAULT: + 8 + +

+
pool +
+

if true will use global_pooling else not. Default: True.

+
+

+ + TYPE: + bool) + + + DEFAULT: + True + +

+
conv_init +
+

if true will init convolution weights else not. Default: False.

+
+

+ + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/visformer.py +
210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
class Visformer(nn.Cell):
+    r"""Visformer model class, based on
+    '"Visformer: The Vision-friendly Transformer"
+    <https://arxiv.org/pdf/2104.12533.pdf>'
+
+    Args:
+        image_size (int) : images input size. Default: 224.
+        number the channels of the input. Default: 32.
+        num_classes (int) : number of classification classes. Default: 1000.
+        embed_dim (int) : embedding dimension in all head. Default: 384.
+        depth (int) : model block depth. Default: None.
+        num_heads (int) : number of heads. Default: None.
+        mlp_ratio (float) : ratio of hidden features in Mlp. Default: 4.
+        qkv_bias (bool) : have bias in qkv layers or not. Default: False.
+        qk_scale (float) : Override default qk scale of head_dim ** -0.5 if set.
+        drop_rate (float) : dropout rate. Default: 0.
+        attn_drop_rate (float) : attention layers dropout rate. Default: 0.
+        drop_path_rate (float) : drop path rate. Default: 0.1.
+        attn_stage (str) : block will have a attention layer if value = '1' else not. Default: '1111'.
+        pos_embed (bool) : position embedding. Default: True.
+        spatial_conv (str) : block will have a spatial convolution layer if value = '1' else not. Default: '1111'.
+        group (int) : convolution group. Default: 8.
+        pool (bool) : if true will use global_pooling else not. Default: True.
+        conv_init : if true will init convolution weights else not. Default: False.
+    """
+
+    def __init__(
+        self,
+        img_size: int = 224,
+        init_channels: int = 32,
+        num_classes: int = 1000,
+        embed_dim: int = 384,
+        depth: List[int] = None,
+        num_heads: List[int] = None,
+        mlp_ratio: float = 4.0,
+        qkv_bias: bool = False,
+        qk_scale: float = None,
+        drop_rate: float = 0.0,
+        attn_drop_rate: float = 0.0,
+        drop_path_rate: float = 0.1,
+        attn_stage: str = "1111",
+        pos_embed: bool = True,
+        spatial_conv: str = "1111",
+        group: int = 8,
+        pool: bool = True,
+        conv_init: bool = False,
+    ) -> None:
+        super(Visformer, self).__init__()
+        self.num_classes = num_classes
+        self.num_features = self.embed_dim = embed_dim
+        self.init_channels = init_channels
+        self.img_size = img_size
+        self.pool = pool
+        self.conv_init = conv_init
+        self.depth = depth
+        assert (isinstance(depth, list) or isinstance(depth, tuple)) and len(depth) == 4
+        if not (isinstance(num_heads, list) or isinstance(num_heads, tuple)):
+            num_heads = [num_heads] * 4
+
+        self.pos_embed = pos_embed
+        dpr = np.linspace(0, drop_path_rate, sum(depth)).tolist()
+
+        self.stem = nn.SequentialCell([
+            nn.Conv2d(3, self.init_channels, 7, 2, pad_mode="pad", padding=3),
+            nn.BatchNorm2d(self.init_channels),
+            nn.ReLU()
+        ])
+        img_size //= 2
+
+        self.pos_drop = Dropout(p=drop_rate)
+        # stage0
+        if depth[0]:
+            self.patch_embed0 = PatchEmbed(img_size=img_size, patch_size=2, in_chans=self.init_channels,
+                                           embed_dim=embed_dim // 4)
+            img_size //= 2
+            if self.pos_embed:
+                self.pos_embed0 = mindspore.Parameter(
+                    ops.zeros((1, embed_dim // 4, img_size, img_size), mindspore.float32))
+            self.stage0 = nn.CellList([
+                Block(dim=embed_dim // 4, num_heads=num_heads[0], head_dim_ratio=0.25, mlp_ratio=mlp_ratio,
+                      qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
+                      group=group, attn_disabled=(attn_stage[0] == "0"), spatial_conv=(spatial_conv[0] == "1"))
+                for i in range(depth[0])
+            ])
+
+        # stage1
+        if depth[0]:
+            self.patch_embed1 = PatchEmbed(img_size=img_size, patch_size=2, in_chans=embed_dim // 4,
+                                           embed_dim=embed_dim // 2)
+            img_size //= 2
+        else:
+            self.patch_embed1 = PatchEmbed(img_size=img_size, patch_size=4, in_chans=self.init_channels,
+                                           embed_dim=embed_dim // 2)
+            img_size //= 4
+
+        if self.pos_embed:
+            self.pos_embed1 = mindspore.Parameter(ops.zeros((1, embed_dim // 2, img_size, img_size), mindspore.float32))
+
+        self.stage1 = nn.CellList([
+            Block(
+                dim=embed_dim // 2, num_heads=num_heads[1], head_dim_ratio=0.5, mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
+                group=group, attn_disabled=(attn_stage[1] == "0"), spatial_conv=(spatial_conv[1] == "1")
+            )
+            for i in range(sum(depth[:1]), sum(depth[:2]))
+        ])
+
+        # stage2
+        self.patch_embed2 = PatchEmbed(img_size=img_size, patch_size=2, in_chans=embed_dim // 2, embed_dim=embed_dim)
+        img_size //= 2
+        if self.pos_embed:
+            self.pos_embed2 = mindspore.Parameter(ops.zeros((1, embed_dim, img_size, img_size), mindspore.float32))
+        self.stage2 = nn.CellList([
+            Block(
+                dim=embed_dim, num_heads=num_heads[2], head_dim_ratio=1.0, mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
+                group=group, attn_disabled=(attn_stage[2] == "0"), spatial_conv=(spatial_conv[2] == "1")
+            )
+            for i in range(sum(depth[:2]), sum(depth[:3]))
+        ])
+
+        # stage3
+        self.patch_embed3 = PatchEmbed(img_size=img_size, patch_size=2, in_chans=embed_dim, embed_dim=embed_dim * 2)
+        img_size //= 2
+        if self.pos_embed:
+            self.pos_embed3 = mindspore.Parameter(ops.zeros((1, embed_dim * 2, img_size, img_size), mindspore.float32))
+        self.stage3 = nn.CellList([
+            Block(
+                dim=embed_dim * 2, num_heads=num_heads[3], head_dim_ratio=1.0, mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
+                group=group, attn_disabled=(attn_stage[3] == "0"), spatial_conv=(spatial_conv[3] == "1")
+            )
+            for i in range(sum(depth[:3]), sum(depth[:4]))
+        ])
+
+        # head
+        if self.pool:
+            self.global_pooling = GlobalAvgPooling()
+
+        self.norm = nn.BatchNorm2d(embed_dim * 2)
+        self.head = nn.Dense(embed_dim * 2, num_classes)
+
+        # weight init
+        if self.pos_embed:
+            if depth[0]:
+                self.pos_embed0.set_data(initializer(TruncatedNormal(0.02),
+                                                     self.pos_embed0.shape, self.pos_embed0.dtype))
+            self.pos_embed1.set_data(initializer(TruncatedNormal(0.02),
+                                                 self.pos_embed1.shape, self.pos_embed1.dtype))
+            self.pos_embed2.set_data(initializer(TruncatedNormal(0.02),
+                                                 self.pos_embed2.shape, self.pos_embed2.dtype))
+            self.pos_embed3.set_data(initializer(TruncatedNormal(0.02),
+                                                 self.pos_embed3.shape, self.pos_embed3.dtype))
+        self._initialize_weights()
+
+    def _initialize_weights(self) -> None:
+        for _, cell in self.cells_and_names():
+            if isinstance(cell, nn.Dense):
+                cell.weight.set_data(initializer(TruncatedNormal(0.02), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(initializer(Constant(0), cell.bias.shape, cell.bias.dtype))
+            elif isinstance(cell, nn.LayerNorm):
+                cell.beta.set_data(initializer(Constant(0), cell.beta.shape, cell.beta.dtype))
+                cell.gamma.set_data(initializer(Constant(1), cell.gamma.shape, cell.gamma.dtype))
+            elif isinstance(cell, nn.BatchNorm2d):
+                cell.beta.set_data(initializer(Constant(0), cell.beta.shape, cell.beta.dtype))
+                cell.gamma.set_data(initializer(Constant(1), cell.gamma.shape, cell.gamma.dtype))
+            elif isinstance(cell, nn.Conv2d):
+                if self.conv_init:
+                    cell.weight.set_data(initializer(HeNormal(mode="fan_out", nonlinearity="relu"), cell.weight.shape,
+                                                     cell.weight.dtype))
+                else:
+                    cell.weight.set_data(initializer(TruncatedNormal(0.02), cell.weight.shape, cell.weight.dtype))
+                if cell.bias is not None:
+                    cell.bias.set_data(initializer(Constant(0), cell.bias.shape, cell.bias.dtype))
+
+    def forward_features(self, x: Tensor) -> Tensor:
+        x = self.stem(x)
+
+        # stage 0
+        if self.depth[0]:
+            x = self.patch_embed0(x)
+            if self.pos_embed:
+                x = x + self.pos_embed0
+                x = self.pos_drop(x)
+            for b in self.stage0:
+                x = b(x)
+
+        # stage 1
+        x = self.patch_embed1(x)
+        if self.pos_embed:
+            x = x + self.pos_embed1
+            x = self.pos_drop(x)
+        for b in self.stage1:
+            x = b(x)
+
+        # stage 2
+        x = self.patch_embed2(x)
+        if self.pos_embed:
+            x = x + self.pos_embed2
+            x = self.pos_drop(x)
+        for b in self.stage2:
+            x = b(x)
+
+        # stage 3
+        x = self.patch_embed3(x)
+        if self.pos_embed:
+            x = x + self.pos_embed3
+            x = self.pos_drop(x)
+        for b in self.stage3:
+            x = b(x)
+        x = self.norm(x)
+        return x
+
+    def forward_head(self, x: Tensor) -> Tensor:
+        # head
+        if self.pool:
+            x = self.global_pooling(x)
+        else:
+            x = x[:, :, 0, 0]
+        x = self.head(x.view(x.shape[0], -1))
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.visformer.visformer_small(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get visformer small model. +Refer to the base class 'models.visformer' for more details.

+ +
+ Source code in mindcv/models/visformer.py +
468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
@register_model
+def visformer_small(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """Get visformer small model.
+    Refer to the base class 'models.visformer' for more details.
+    """
+    default_cfg = default_cfgs["visformer_small"]
+    model = Visformer(img_size=224, init_channels=32, num_classes=num_classes, embed_dim=384,
+                      depth=[0, 7, 4, 4], num_heads=[6, 6, 6, 6], mlp_ratio=4., group=8,
+                      attn_stage="0011", spatial_conv="1100", conv_init=True, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.visformer.visformer_small_v2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get visformer small2 model. +Refer to the base class 'models.visformer' for more details.

+ +
+ Source code in mindcv/models/visformer.py +
482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
@register_model
+def visformer_small_v2(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """Get visformer small2 model.
+    Refer to the base class 'models.visformer' for more details.
+    """
+    default_cfg = default_cfgs["visformer_small_v2"]
+    model = Visformer(img_size=224, init_channels=32, num_classes=num_classes, embed_dim=256,
+                      depth=[1, 10, 14, 3], num_heads=[2, 4, 8, 16], mlp_ratio=4., qk_scale=-0.5,
+                      group=8, attn_stage="0011", spatial_conv="1100", conv_init=True, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.visformer.visformer_tiny(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get visformer tiny model. +Refer to the base class 'models.visformer' for more details.

+ +
+ Source code in mindcv/models/visformer.py +
439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
@register_model
+def visformer_tiny(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """Get visformer tiny model.
+    Refer to the base class 'models.visformer' for more details.
+    """
+    default_cfg = default_cfgs["visformer_tiny"]
+    model = Visformer(img_size=224, init_channels=16, num_classes=num_classes, embed_dim=192,
+                      depth=[0, 7, 4, 4], num_heads=[3, 3, 3, 3], mlp_ratio=4., group=8,
+                      attn_stage="0011", spatial_conv="1100", drop_path_rate=0.03, conv_init=True, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.visformer.visformer_tiny_v2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get visformer tiny2 model. +Refer to the base class 'models.visformer' for more details.

+ +
+ Source code in mindcv/models/visformer.py +
454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
@register_model
+def visformer_tiny_v2(pretrained: bool = False, num_classes: int = 1000, in_channels: int = 3, **kwargs):
+    """Get visformer tiny2 model.
+    Refer to the base class 'models.visformer' for more details.
+    """
+    default_cfg = default_cfgs["visformer_tiny_v2"]
+    model = Visformer(img_size=224, init_channels=24, num_classes=num_classes, embed_dim=192,
+                      depth=[1, 4, 6, 3], num_heads=[1, 3, 6, 12], mlp_ratio=4., qk_scale=-0.5, group=8,
+                      attn_stage="0011", spatial_conv="1100", drop_path_rate=0.03, conv_init=True, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

vit

+ + +
+ + + +

+ mindcv.models.vit.ViT + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Vision Transformer architecture implementation.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
image_size +
+

Input image size. Default: 224.

+
+

+ + TYPE: + int + + + DEFAULT: + 224 + +

+
input_channels +
+

The number of input channel. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
patch_size +
+

Patch size of image. Default: 16.

+
+

+ + TYPE: + int + + + DEFAULT: + 16 + +

+
embed_dim +
+

The dimension of embedding. Default: 768.

+
+

+ + TYPE: + int + + + DEFAULT: + 768 + +

+
num_layers +
+

The depth of transformer. Default: 12.

+
+

+ + TYPE: + int + + + DEFAULT: + 12 + +

+
num_heads +
+

The number of attention heads. Default: 12.

+
+

+ + TYPE: + int + + + DEFAULT: + 12 + +

+
mlp_dim +
+

The dimension of MLP hidden layer. Default: 3072.

+
+

+ + TYPE: + int + + + DEFAULT: + 3072 + +

+
keep_prob +
+

The keep rate, greater than 0 and less equal than 1. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
attention_keep_prob +
+

The keep rate for attention layer. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
drop_path_keep_prob +
+

The keep rate for drop path. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
activation +
+

Activation function which will be stacked on top of the +normalization layer (if not None), otherwise on top of the conv layer. Default: nn.GELU.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + nn.GELU + +

+
norm +
+

Norm layer that will be stacked on top of the convolution +layer. Default: nn.LayerNorm.

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + nn.LayerNorm + +

+
pool +
+

The method of pooling. Default: 'cls'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'cls' + +

+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+
+ Outputs +

Tensor of shape :math:(N, 768)

+
+ + + + + + + + + + + + + + + +
RAISESDESCRIPTION
+ + ValueError + + +
+

If split is not 'train', 'test' or 'infer'.

+
+
+ +
+ Supported Platforms +

GPU

+
+ + +

Examples:

+
>>> net = ViT()
+>>> x = ms.Tensor(np.ones([1, 3, 224, 224]), ms.float32)
+>>> output = net(x)
+>>> print(output.shape)
+(1, 768)
+
+

About ViT:

+

Vision Transformer (ViT) shows that a pure transformer applied directly to sequences of image +patches can perform very well on image classification tasks. When pre-trained on large amounts +of data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet, +CIFAR-100, VTAB, etc.), Vision Transformer (ViT) attains excellent results compared to state-of-the-art +convolutional networks while requiring substantially fewer computational resources to train.

+

Citation:

+

.. code-block::

+
@article{2020An,
+title={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale},
+author={Dosovitskiy, A. and Beyer, L. and Kolesnikov, A. and Weissenborn, D. and Houlsby, N.},
+year={2020},
+}
+
+ +
+ Source code in mindcv/models/vit.py +
468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
+513
+514
+515
+516
+517
+518
+519
+520
+521
+522
+523
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
class ViT(nn.Cell):
+    """
+    Vision Transformer architecture implementation.
+
+    Args:
+        image_size (int): Input image size. Default: 224.
+        input_channels (int): The number of input channel. Default: 3.
+        patch_size (int): Patch size of image. Default: 16.
+        embed_dim (int): The dimension of embedding. Default: 768.
+        num_layers (int): The depth of transformer. Default: 12.
+        num_heads (int): The number of attention heads. Default: 12.
+        mlp_dim (int): The dimension of MLP hidden layer. Default: 3072.
+        keep_prob (float): The keep rate, greater than 0 and less equal than 1. Default: 1.0.
+        attention_keep_prob (float): The keep rate for attention layer. Default: 1.0.
+        drop_path_keep_prob (float): The keep rate for drop path. Default: 1.0.
+        activation (nn.Cell): Activation function which will be stacked on top of the
+            normalization layer (if not None), otherwise on top of the conv layer. Default: nn.GELU.
+        norm (nn.Cell, optional): Norm layer that will be stacked on top of the convolution
+            layer. Default: nn.LayerNorm.
+        pool (str): The method of pooling. Default: 'cls'.
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Outputs:
+        Tensor of shape :math:`(N, 768)`
+
+    Raises:
+        ValueError: If `split` is not 'train', 'test' or 'infer'.
+
+    Supported Platforms:
+        ``GPU``
+
+    Examples:
+        >>> net = ViT()
+        >>> x = ms.Tensor(np.ones([1, 3, 224, 224]), ms.float32)
+        >>> output = net(x)
+        >>> print(output.shape)
+        (1, 768)
+
+    About ViT:
+
+    Vision Transformer (ViT) shows that a pure transformer applied directly to sequences of image
+    patches can perform very well on image classification tasks. When pre-trained on large amounts
+    of data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet,
+    CIFAR-100, VTAB, etc.), Vision Transformer (ViT) attains excellent results compared to state-of-the-art
+    convolutional networks while requiring substantially fewer computational resources to train.
+
+    Citation:
+
+    .. code-block::
+
+        @article{2020An,
+        title={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale},
+        author={Dosovitskiy, A. and Beyer, L. and Kolesnikov, A. and Weissenborn, D. and Houlsby, N.},
+        year={2020},
+        }
+    """
+
+    def __init__(
+        self,
+        image_size: int = 224,
+        input_channels: int = 3,
+        patch_size: int = 16,
+        embed_dim: int = 768,
+        num_layers: int = 12,
+        num_heads: int = 12,
+        mlp_dim: int = 3072,
+        keep_prob: float = 1.0,
+        attention_keep_prob: float = 1.0,
+        drop_path_keep_prob: float = 1.0,
+        activation: nn.Cell = nn.GELU,
+        norm: Optional[nn.Cell] = nn.LayerNorm,
+        pool: str = "cls",
+    ) -> None:
+        super().__init__()
+
+        self.patch_embedding = PatchEmbedding(image_size=image_size,
+                                              patch_size=patch_size,
+                                              embed_dim=embed_dim,
+                                              input_channels=input_channels)
+        num_patches = self.patch_embedding.num_patches
+
+        if pool == "cls":
+            self.cls_token = init(init_type=Normal(sigma=1.0),
+                                  shape=(1, 1, embed_dim),
+                                  dtype=ms.float32,
+                                  name="cls",
+                                  requires_grad=True)
+            self.pos_embedding = init(init_type=Normal(sigma=1.0),
+                                      shape=(1, num_patches + 1, embed_dim),
+                                      dtype=ms.float32,
+                                      name="pos_embedding",
+                                      requires_grad=True)
+            self.concat = ops.Concat(axis=1)
+        else:
+            self.pos_embedding = init(init_type=Normal(sigma=1.0),
+                                      shape=(1, num_patches, embed_dim),
+                                      dtype=ms.float32,
+                                      name="pos_embedding",
+                                      requires_grad=True)
+            self.mean = ops.ReduceMean(keep_dims=False)
+
+        self.pool = pool
+        self.pos_dropout = Dropout(p=1.0-keep_prob)
+        self.norm = norm((embed_dim,))
+        self.tile = ops.Tile()
+        self.transformer = TransformerEncoder(
+            dim=embed_dim,
+            num_layers=num_layers,
+            num_heads=num_heads,
+            mlp_dim=mlp_dim,
+            keep_prob=keep_prob,
+            attention_keep_prob=attention_keep_prob,
+            drop_path_keep_prob=drop_path_keep_prob,
+            activation=activation,
+            norm=norm,
+        )
+
+    def construct(self, x):
+        """ViT construct."""
+        x = self.patch_embedding(x)
+
+        if self.pool == "cls":
+            cls_tokens = self.tile(self.cls_token, (x.shape[0], 1, 1))
+            x = self.concat((cls_tokens, x))
+            x += self.pos_embedding
+        else:
+            x += self.pos_embedding
+        x = self.pos_dropout(x)
+        x = self.transformer(x)
+        x = self.norm(x)
+
+        if self.pool == "cls":
+            x = x[:, 0]
+        else:
+            x = self.mean(x, (1, ))  # (1,) or (1,2)
+        return x
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.models.vit.ViT.construct(x) + +

+ + +
+ +

ViT construct.

+ +
+ Source code in mindcv/models/vit.py +
587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
def construct(self, x):
+    """ViT construct."""
+    x = self.patch_embedding(x)
+
+    if self.pool == "cls":
+        cls_tokens = self.tile(self.cls_token, (x.shape[0], 1, 1))
+        x = self.concat((cls_tokens, x))
+        x += self.pos_embedding
+    else:
+        x += self.pos_embedding
+    x = self.pos_dropout(x)
+    x = self.transformer(x)
+    x = self.norm(x)
+
+    if self.pool == "cls":
+        x = x[:, 0]
+    else:
+        x = self.mean(x, (1, ))  # (1,) or (1,2)
+    return x
+
+
+
+ +
+ + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.vit.vit_b_16_224(pretrained=False, num_classes=1000, in_channels=3, image_size=224, has_logits=False, drop_rate=0.0, drop_path_rate=0.0) + +

+ + +
+ +

Constructs a vit_b_16 architecture from +An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>_.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
pretrained +
+

Whether to download and load the pre-trained model. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
num_classes +
+

The number of classification. Default: 1000.

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
in_channels +
+

The number of input channels. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
image_size +
+

The input image size. Default: 224 for ImageNet.

+
+

+ + TYPE: + int + + + DEFAULT: + 224 + +

+
has_logits +
+

Whether has logits or not. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
drop_rate +
+

The drop out rate. Default: 0.0.s

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
drop_path_rate +
+

The stochastic depth rate. Default: 0.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + ViT + + +
+

ViT network, MindSpore.nn.Cell

+
+
+ +
+ Inputs +
    +
  • x (Tensor) - Tensor of shape :math:(N, C_{in}, H_{in}, W_{in}).
  • +
+
+ + +

Examples:

+
>>> net = vit_b_16_224()
+>>> x = ms.Tensor(np.ones([1, 3, 224, 224]), ms.float32)
+>>> output = net(x)
+>>> print(output.shape)
+(1, 1000)
+
+ +
+ Outputs +

Tensor of shape :math:(N, CLASSES_{out})

+
+
+ Supported Platforms +

GPU

+
+
+ Source code in mindcv/models/vit.py +
663
+664
+665
+666
+667
+668
+669
+670
+671
+672
+673
+674
+675
+676
+677
+678
+679
+680
+681
+682
+683
+684
+685
+686
+687
+688
+689
+690
+691
+692
+693
+694
+695
+696
+697
+698
+699
+700
+701
+702
+703
+704
+705
+706
+707
+708
+709
+710
+711
+712
+713
+714
+715
+716
+717
+718
+719
+720
+721
+722
+723
+724
@register_model
+def vit_b_16_224(
+    pretrained: bool = False,
+    num_classes: int = 1000,
+    in_channels: int = 3,
+    image_size: int = 224,
+    has_logits: bool = False,
+    drop_rate: float = 0.0,
+    # attention-dropout: float = 0.0,
+    drop_path_rate: float = 0.0,
+) -> ViT:
+    """
+    Constructs a vit_b_16 architecture from
+    `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>`_.
+
+    Args:
+        pretrained (bool): Whether to download and load the pre-trained model. Default: False.
+        num_classes (int): The number of classification. Default: 1000.
+        in_channels (int): The number of input channels. Default: 3.
+        image_size (int): The input image size. Default: 224 for ImageNet.
+        has_logits (bool): Whether has logits or not. Default: False.
+        drop_rate (float): The drop out rate. Default: 0.0.s
+        drop_path_rate (float): The stochastic depth rate. Default: 0.0.
+
+    Returns:
+        ViT network, MindSpore.nn.Cell
+
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+
+    Examples:
+        >>> net = vit_b_16_224()
+        >>> x = ms.Tensor(np.ones([1, 3, 224, 224]), ms.float32)
+        >>> output = net(x)
+        >>> print(output.shape)
+        (1, 1000)
+
+    Outputs:
+        Tensor of shape :math:`(N, CLASSES_{out})`
+
+    Supported Platforms:
+        ``GPU``
+    """
+    config = ConfigDict()
+    config.image_size = image_size
+    config.num_classes = num_classes
+    config.patch_size = 16
+    config.embed_dim = 768
+    config.mlp_dim = 3072
+    config.num_heads = 12
+    config.num_layers = 12
+    config.dropout = drop_rate
+    config.attention_dropout = drop_rate  # attention-dropout
+    config.drop_path_rate = drop_path_rate
+    config.pretrained = pretrained
+    config.input_channels = in_channels
+    config.pool = "cls"
+    config.representation_size = 768 if has_logits else None
+
+    config.url_cfg = default_cfgs["vit_b_16_224"]
+
+    return vit(**config)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vit.vit_b_16_384(pretrained=False, num_classes=1000, in_channels=3, image_size=384, has_logits=False, drop_rate=0.0, drop_path_rate=0.0) + +

+ + +
+ +

construct and return a ViT network

+ +
+ Source code in mindcv/models/vit.py +
727
+728
+729
+730
+731
+732
+733
+734
+735
+736
+737
+738
+739
+740
+741
+742
+743
+744
+745
+746
+747
+748
+749
+750
+751
+752
+753
+754
+755
+756
+757
@register_model
+def vit_b_16_384(
+    pretrained: bool = False,
+    num_classes: int = 1000,
+    in_channels: int = 3,
+    image_size: int = 384,
+    has_logits: bool = False,
+    drop_rate: float = 0.0,
+    # attention-dropout: float = 0.0,
+    drop_path_rate: float = 0.0,
+) -> ViT:
+    """construct and return a ViT network"""
+    config = ConfigDict()
+    config.image_size = image_size
+    config.num_classes = num_classes
+    config.patch_size = 16
+    config.embed_dim = 768
+    config.mlp_dim = 3072
+    config.num_heads = 12
+    config.num_layers = 12
+    config.dropout = drop_rate
+    config.attention_dropout = drop_rate  # attention-dropout
+    config.drop_path_rate = drop_path_rate
+    config.pretrained = pretrained
+    config.input_channels = in_channels
+    config.pool = "cls"
+    config.representation_size = 768 if has_logits else None
+
+    config.url_cfg = default_cfgs["vit_b_16_384"]
+
+    return vit(**config)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vit.vit_b_32_224(pretrained=False, num_classes=1000, in_channels=3, image_size=224, has_logits=False, drop_rate=0.0, drop_path_rate=0.0) + +

+ + +
+ +

construct and return a ViT network

+ +
+ Source code in mindcv/models/vit.py +
828
+829
+830
+831
+832
+833
+834
+835
+836
+837
+838
+839
+840
+841
+842
+843
+844
+845
+846
+847
+848
+849
+850
+851
+852
+853
+854
+855
+856
+857
+858
@register_model
+def vit_b_32_224(
+    pretrained: bool = False,
+    num_classes: int = 1000,
+    in_channels: int = 3,
+    image_size: int = 224,
+    has_logits: bool = False,
+    drop_rate: float = 0.0,
+    # attention-dropout: float = 0.0,
+    drop_path_rate: float = 0.0,
+) -> ViT:
+    """construct and return a ViT network"""
+    config = ConfigDict()
+    config.image_size = image_size
+    config.num_classes = num_classes
+    config.patch_size = 32
+    config.embed_dim = 768
+    config.mlp_dim = 3072
+    config.num_heads = 12
+    config.num_layers = 12
+    config.dropout = drop_rate
+    config.attention_dropout = drop_rate  # attention-dropout
+    config.drop_path_rate = drop_path_rate
+    config.pretrained = pretrained
+    config.input_channels = in_channels
+    config.pool = "cls"
+    config.representation_size = 768 if has_logits else None
+
+    config.url_cfg = default_cfgs["vit_b_32_224"]
+
+    return vit(**config)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vit.vit_b_32_384(pretrained=False, num_classes=1000, in_channels=3, image_size=384, has_logits=False, drop_rate=0.0, drop_path_rate=0.0) + +

+ + +
+ +

construct and return a ViT network

+ +
+ Source code in mindcv/models/vit.py +
861
+862
+863
+864
+865
+866
+867
+868
+869
+870
+871
+872
+873
+874
+875
+876
+877
+878
+879
+880
+881
+882
+883
+884
+885
+886
+887
+888
+889
+890
+891
@register_model
+def vit_b_32_384(
+    pretrained: bool = False,
+    num_classes: int = 1000,
+    in_channels: int = 3,
+    image_size: int = 384,
+    has_logits: bool = False,
+    drop_rate: float = 0.0,
+    # attention_dropout: float = 0.0,
+    drop_path_rate: float = 0.0,
+) -> ViT:
+    """construct and return a ViT network"""
+    config = ConfigDict()
+    config.image_size = image_size
+    config.num_classes = num_classes
+    config.patch_size = 32
+    config.embed_dim = 768
+    config.mlp_dim = 3072
+    config.num_heads = 12
+    config.num_layers = 12
+    config.dropout = drop_rate
+    config.attention_dropout = drop_rate  # attention_dropout
+    config.drop_path_rate = drop_path_rate
+    config.pretrained = pretrained
+    config.input_channels = in_channels
+    config.pool = "cls"
+    config.representation_size = 768 if has_logits else None
+
+    config.url_cfg = default_cfgs["vit_b_32_384"]
+
+    return vit(**config)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vit.vit_l_16_224(pretrained=False, num_classes=1000, in_channels=3, image_size=224, has_logits=False, drop_rate=0.0, drop_path_rate=0.0) + +

+ + +
+ +

construct and return a ViT network

+ +
+ Source code in mindcv/models/vit.py +
760
+761
+762
+763
+764
+765
+766
+767
+768
+769
+770
+771
+772
+773
+774
+775
+776
+777
+778
+779
+780
+781
+782
+783
+784
+785
+786
+787
+788
+789
+790
+791
@register_model
+def vit_l_16_224(
+    pretrained: bool = False,
+    num_classes: int = 1000,
+    in_channels: int = 3,
+    image_size: int = 224,
+    has_logits: bool = False,
+    drop_rate: float = 0.0,
+    # attention-dropout: float = 0.0,
+    drop_path_rate: float = 0.0,
+) -> ViT:
+    """construct and return a ViT network"""
+
+    config = ConfigDict()
+    config.image_size = image_size
+    config.num_classes = num_classes
+    config.patch_size = 16
+    config.embed_dim = 1024
+    config.mlp_dim = 4096
+    config.num_heads = 16
+    config.num_layers = 24
+    config.dropout = drop_rate
+    config.attention_dropout = drop_rate  # attention-dropout
+    config.drop_path_rate = drop_path_rate
+    config.input_channels = in_channels
+    config.pool = "cls"
+    config.pretrained = pretrained
+    config.representation_size = 1024 if has_logits else None
+
+    config.url_cfg = default_cfgs["vit_l_16_224"]
+
+    return vit(**config)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vit.vit_l_16_384(pretrained=False, num_classes=1000, in_channels=3, image_size=384, has_logits=False, drop_rate=0.0, drop_path_rate=0.0) + +

+ + +
+ +

construct and return a ViT network

+ +
+ Source code in mindcv/models/vit.py +
794
+795
+796
+797
+798
+799
+800
+801
+802
+803
+804
+805
+806
+807
+808
+809
+810
+811
+812
+813
+814
+815
+816
+817
+818
+819
+820
+821
+822
+823
+824
+825
@register_model
+def vit_l_16_384(
+    pretrained: bool = False,
+    num_classes: int = 1000,
+    in_channels: int = 3,
+    image_size: int = 384,
+    has_logits: bool = False,
+    drop_rate: float = 0.0,
+    # attention-dropout: float = 0.0,
+    drop_path_rate: float = 0.0,
+) -> ViT:
+    """construct and return a ViT network"""
+
+    config = ConfigDict()
+    config.image_size = image_size
+    config.num_classes = num_classes
+    config.patch_size = 16
+    config.embed_dim = 1024
+    config.mlp_dim = 4096
+    config.num_heads = 16
+    config.num_layers = 24
+    config.dropout = drop_rate
+    config.attention_dropout = drop_rate  # attention-dropout
+    config.drop_path_rate = drop_path_rate
+    config.input_channels = in_channels
+    config.pool = "cls"
+    config.pretrained = pretrained
+    config.representation_size = 1024 if has_logits else None
+
+    config.url_cfg = default_cfgs["vit_l_16_384"]
+
+    return vit(**config)
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.vit.vit_l_32_224(pretrained=False, num_classes=1000, in_channels=3, image_size=224, has_logits=False, drop_rate=0.0, drop_path_rate=0.0) + +

+ + +
+ +

construct and return a ViT network

+ +
+ Source code in mindcv/models/vit.py +
894
+895
+896
+897
+898
+899
+900
+901
+902
+903
+904
+905
+906
+907
+908
+909
+910
+911
+912
+913
+914
+915
+916
+917
+918
+919
+920
+921
+922
+923
+924
@register_model
+def vit_l_32_224(
+    pretrained: bool = False,
+    num_classes: int = 1000,
+    in_channels: int = 3,
+    image_size: int = 224,
+    has_logits: bool = False,
+    drop_rate: float = 0.0,
+    # attention-dropout: float = 0.0,
+    drop_path_rate: float = 0.0,
+) -> ViT:
+    """construct and return a ViT network"""
+    config = ConfigDict()
+    config.image_size = image_size
+    config.num_classes = num_classes
+    config.patch_size = 32
+    config.embed_dim = 1024
+    config.mlp_dim = 4096
+    config.num_heads = 16
+    config.num_layers = 24
+    config.dropout = drop_rate
+    config.attention_dropout = drop_rate  # attention-dropout
+    config.drop_path_rate = drop_path_rate
+    config.pretrained = pretrained
+    config.input_channels = in_channels
+    config.pool = "cls"
+    config.representation_size = 1024 if has_logits else None
+
+    config.url_cfg = default_cfgs["vit_l_32_224"]
+
+    return vit(**config)
+
+
+
+ +

volo

+ + +
+ + + +

+ mindcv.models.volo.VOLO + + +

+ + +
+

+ Bases: nn.Cell

+ + +

Vision Outlooker, the main class of our model +--layers: [x,x,x,x], four blocks in two stages, the first block is outlooker, the + other three are transformer, we set four blocks, which are easily + applied to downstream tasks +--img_size, --in_channels, --num_classes: these three are very easy to understand +--patch_size: patch_size in outlook attention +--stem_hidden_dim: hidden dim of patch embedding, d1-d4 is 64, d5 is 128 +--embed_dims, --num_heads: embedding dim, number of heads in each block +--downsamples: flags to apply downsampling or not +--outlook_attention: flags to apply outlook attention or not +--mlp_ratios, --qkv_bias, --qk_scale, --drop_rate: easy to undertand +--attn_drop_rate, --drop_path_rate, --norm_layer: easy to undertand +--post_layers: post layers like two class attention layers using [ca, ca], + if yes, return_mean=False +--return_mean: use mean of all feature tokens for classification, if yes, no class token +--return_dense: use token labeling, details are here: + https://github.com/zihangJiang/TokenLabeling +--mix_token: mixing tokens as token labeling, details are here: + https://github.com/zihangJiang/TokenLabeling +--pooling_scale: pooling_scale=2 means we downsample 2x +--out_kernel, --out_stride, --out_padding: kerner size, + stride, and padding for outlook attention

+ +
+ Source code in mindcv/models/volo.py +
550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
+613
+614
+615
+616
+617
+618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
+663
+664
+665
+666
+667
+668
+669
+670
+671
+672
+673
+674
+675
+676
+677
+678
+679
+680
+681
+682
+683
+684
+685
+686
+687
+688
+689
+690
+691
+692
+693
+694
+695
+696
+697
+698
+699
+700
+701
+702
+703
+704
+705
+706
+707
+708
+709
+710
+711
+712
+713
+714
+715
+716
+717
+718
+719
+720
+721
+722
+723
+724
+725
+726
+727
+728
+729
+730
+731
+732
+733
+734
+735
+736
+737
+738
+739
+740
+741
+742
class VOLO(nn.Cell):
+    """
+    Vision Outlooker, the main class of our model
+    --layers: [x,x,x,x], four blocks in two stages, the first block is outlooker, the
+              other three are transformer, we set four blocks, which are easily
+              applied to downstream tasks
+    --img_size, --in_channels, --num_classes: these three are very easy to understand
+    --patch_size: patch_size in outlook attention
+    --stem_hidden_dim: hidden dim of patch embedding, d1-d4 is 64, d5 is 128
+    --embed_dims, --num_heads: embedding dim, number of heads in each block
+    --downsamples: flags to apply downsampling or not
+    --outlook_attention: flags to apply outlook attention or not
+    --mlp_ratios, --qkv_bias, --qk_scale, --drop_rate: easy to undertand
+    --attn_drop_rate, --drop_path_rate, --norm_layer: easy to undertand
+    --post_layers: post layers like two class attention layers using [ca, ca],
+                  if yes, return_mean=False
+    --return_mean: use mean of all feature tokens for classification, if yes, no class token
+    --return_dense: use token labeling, details are here:
+                    https://github.com/zihangJiang/TokenLabeling
+    --mix_token: mixing tokens as token labeling, details are here:
+                    https://github.com/zihangJiang/TokenLabeling
+    --pooling_scale: pooling_scale=2 means we downsample 2x
+    --out_kernel, --out_stride, --out_padding: kerner size,
+                                               stride, and padding for outlook attention
+    """
+    def __init__(
+        self,
+        layers,
+        img_size=224,
+        in_channels=3,
+        num_classes=1000,
+        patch_size=8,
+        stem_hidden_dim=64,
+        embed_dims=None,
+        num_heads=None,
+        downsamples=None,
+        outlook_attention=None,
+        mlp_ratios=None,
+        qkv_bias=False,
+        qk_scale=None,
+        drop_rate=0.0,
+        attn_drop_rate=0.0,
+        drop_path_rate=0.0,
+        norm_layer=nn.LayerNorm,
+        post_layers=None,
+        return_mean=False,
+        return_dense=True,
+        mix_token=True,
+        pooling_scale=2,
+        out_kernel=3,
+        out_stride=2,
+        out_padding=1,
+    ) -> None:
+
+        super().__init__()
+        self.num_classes = num_classes
+        self.patch_embed = PatchEmbed(stem_conv=True, stem_stride=2, patch_size=patch_size,
+                                      in_channels=in_channels, hidden_dim=stem_hidden_dim,
+                                      embed_dim=embed_dims[0])
+        # inital positional encoding, we add positional encoding after outlooker blocks
+        self.pos_embed = Parameter(
+            ops.zeros((1, img_size // patch_size // pooling_scale,
+                      img_size // patch_size // pooling_scale,
+                      embed_dims[-1]), mstype.float32))
+
+        self.pos_drop = Dropout(p=drop_rate)
+
+        # set the main block in network
+        network = []
+        for i in range(len(layers)):
+            if outlook_attention[i]:
+                # stage 1
+                stage = outlooker_blocks(Outlooker, i, embed_dims[i], layers,
+                                         downsample=downsamples[i], num_heads=num_heads[i],
+                                         kernel_size=out_kernel, stride=out_stride,
+                                         padding=out_padding, mlp_ratio=mlp_ratios[i],
+                                         qkv_bias=qkv_bias, qk_scale=qk_scale,
+                                         attn_drop=attn_drop_rate, norm_layer=norm_layer)
+                network.append(stage)
+            else:
+                # stage 2
+                stage = transformer_blocks(Transformer, i, embed_dims[i], layers,
+                                           num_heads[i], mlp_ratio=mlp_ratios[i],
+                                           qkv_bias=qkv_bias, qk_scale=qk_scale,
+                                           drop_path_rate=drop_path_rate,
+                                           attn_drop=attn_drop_rate,
+                                           norm_layer=norm_layer)
+                network.append(stage)
+
+            if downsamples[i]:
+                # downsampling between two stages
+                network.append(Downsample(embed_dims[i], embed_dims[i + 1], 2))
+
+        self.network = nn.CellList(network)
+
+        # set post block, for example, class attention layers
+        self.post_network = None
+        if post_layers is not None:
+            self.post_network = nn.CellList([
+                get_block(post_layers[i],
+                          dim=embed_dims[-1],
+                          num_heads=num_heads[-1],
+                          mlp_ratio=mlp_ratios[-1],
+                          qkv_bias=qkv_bias,
+                          qk_scale=qk_scale,
+                          attn_drop=attn_drop_rate,
+                          drop_path=0.0,
+                          norm_layer=norm_layer)
+                for i in range(len(post_layers))
+            ])
+            self.cls_token = Parameter(ops.zeros((1, 1, embed_dims[-1]), mstype.float32))
+            self.cls_token.set_data(init.initializer(init.TruncatedNormal(sigma=.02), self.cls_token.data.shape))
+
+        # set output type
+        self.return_mean = return_mean  # if yes, return mean, not use class token
+        self.return_dense = return_dense  # if yes, return class token and all feature tokens
+        if return_dense:
+            assert not return_mean, "cannot return both mean and dense"
+        self.mix_token = mix_token
+        self.pooling_scale = pooling_scale
+        if mix_token:  # enable token mixing, see token labeling for details.
+            self.beta = 1.0
+            assert return_dense, "return all tokens if mix_token is enabled"
+        if return_dense:
+            self.aux_head = nn.Dense(
+                embed_dims[-1],
+                num_classes) if num_classes > 0 else Identity()
+        self.norm = norm_layer([embed_dims[-1]])
+
+        # Classifier head
+        self.head = nn.Dense(
+            embed_dims[-1], num_classes) if num_classes > 0 else Identity()
+
+        self.pos_embed.set_data(init.initializer(init.TruncatedNormal(sigma=.02), self.pos_embed.data.shape))
+        self._init_weights()
+
+    def _init_weights(self) -> None:
+        for name, m in self.cells_and_names():
+            if isinstance(m, nn.Dense):
+                m.weight.set_data(init.initializer(init.TruncatedNormal(sigma=.02), m.weight.data.shape))
+                if m.bias is not None:
+                    m.bias.set_data(init.initializer(init.Constant(0), m.bias.shape))
+            elif isinstance(m, nn.LayerNorm):
+                m.gamma.set_data(init.initializer(init.Constant(1), m.gamma.shape))
+                m.beta.set_data(init.initializer(init.Constant(0), m.beta.shape))
+
+    def forward_embeddings(self, x: Tensor) -> Tensor:
+        # patch embedding
+        x = self.patch_embed(x)
+        # B,C,H,W-> B,H,W,C
+        x = ops.transpose(x, (0, 2, 3, 1))
+        return x
+
+    def forward_tokens(self, x: Tensor) -> Tensor:
+        for idx, block in enumerate(self.network):
+            if idx == 2:  # add positional encoding after outlooker blocks
+                x = x + self.pos_embed
+                x = self.pos_drop(x)
+            x = block(x)
+
+        B, H, W, C = x.shape
+        x = ops.reshape(x, (B, -1, C))
+        return x
+
+    def forward_cls(self, x: Tensor) -> Tensor:
+        # B, N, C = x.shape
+        cls_tokens = ops.broadcast_to(self.cls_token, (x.shape[0], -1, -1))
+        x = ops.Cast()(x, cls_tokens.dtype)
+        x = ops.concat([cls_tokens, x], 1)
+        for block in self.post_network:
+            x = block(x)
+        return x
+
+    def construct(self, x: Tensor) -> Tensor:
+        # step1: patch embedding
+        x = self.forward_embeddings(x)
+
+        # step2: tokens learning in the two stages
+        x = self.forward_tokens(x)
+
+        # step3: post network, apply class attention or not
+        if self.post_network is not None:
+            x = self.forward_cls(x)
+        x = self.norm(x)
+
+        if self.return_mean:  # if no class token, return mean
+            return self.head(ops.mean(x, 1))
+
+        x_cls = self.head(x[:, 0])
+        if not self.return_dense:
+            return x_cls
+
+        return x_cls
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.volo.volo_d1(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

VOLO-D1 model, Params: 27M +--layers: [x,x,x,x], four blocks in two stages, the first stage(block) is outlooker, + the other three blocks are transformer, we set four blocks, which are easily + applied to downstream tasks +--embed_dims, --num_heads,: embedding dim, number of heads in each block +--downsamples: flags to apply downsampling or not in four blocks +--outlook_attention: flags to apply outlook attention or not +--mlp_ratios: mlp ratio in four blocks +--post_layers: post layers like two class attention layers using [ca, ca] +See detail for all args in the class VOLO()

+ +
+ Source code in mindcv/models/volo.py +
745
+746
+747
+748
+749
+750
+751
+752
+753
+754
+755
+756
+757
+758
+759
+760
+761
+762
+763
+764
+765
+766
+767
+768
+769
+770
+771
+772
+773
+774
+775
+776
@register_model
+def volo_d1(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """
+    VOLO-D1 model, Params: 27M
+    --layers: [x,x,x,x], four blocks in two stages, the first stage(block) is outlooker,
+            the other three blocks are transformer, we set four blocks, which are easily
+             applied to downstream tasks
+    --embed_dims, --num_heads,: embedding dim, number of heads in each block
+    --downsamples: flags to apply downsampling or not in four blocks
+    --outlook_attention: flags to apply outlook attention or not
+    --mlp_ratios: mlp ratio in four blocks
+    --post_layers: post layers like two class attention layers using [ca, ca]
+    See detail for all args in the class VOLO()
+    """
+    default_cfg = default_cfgs['volo_d1']
+
+    # first block is outlooker (stage1), the other three are transformer (stage2)
+    model = VOLO(layers=[4, 4, 8, 2],
+                 in_channels=in_channels,
+                 num_classes=num_classes,
+                 embed_dims=[192, 384, 384, 384],
+                 num_heads=[6, 12, 12, 12],
+                 mlp_ratios=[3, 3, 3, 3],
+                 downsamples=[True, False, False, False],
+                 outlook_attention=[True, False, False, False],
+                 post_layers=['ca', 'ca'],
+                 **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.volo.volo_d2(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

VOLO-D2 model, Params: 59M

+ +
+ Source code in mindcv/models/volo.py +
779
+780
+781
+782
+783
+784
+785
+786
+787
+788
+789
+790
+791
+792
+793
+794
+795
+796
+797
+798
+799
@register_model
+def volo_d2(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """
+    VOLO-D2 model, Params: 59M
+    """
+    default_cfg = default_cfgs['volo_d2']
+    model = VOLO(layers=[6, 4, 10, 4],
+                 in_channels=in_channels,
+                 num_classes=num_classes,
+                 embed_dims=[256, 512, 512, 512],
+                 num_heads=[8, 16, 16, 16],
+                 mlp_ratios=[3, 3, 3, 3],
+                 downsamples=[True, False, False, False],
+                 outlook_attention=[True, False, False, False],
+                 post_layers=['ca', 'ca'],
+                 **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.volo.volo_d3(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

VOLO-D3 model, Params: 86M

+ +
+ Source code in mindcv/models/volo.py +
802
+803
+804
+805
+806
+807
+808
+809
+810
+811
+812
+813
+814
+815
+816
+817
+818
+819
+820
+821
+822
@register_model
+def volo_d3(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """
+    VOLO-D3 model, Params: 86M
+    """
+    default_cfg = default_cfgs['volo_d3']
+    model = VOLO(layers=[8, 8, 16, 4],
+                 in_channels=in_channels,
+                 num_classes=num_classes,
+                 embed_dims=[256, 512, 512, 512],
+                 num_heads=[8, 16, 16, 16],
+                 mlp_ratios=[3, 3, 3, 3],
+                 downsamples=[True, False, False, False],
+                 outlook_attention=[True, False, False, False],
+                 post_layers=['ca', 'ca'],
+                 **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.volo.volo_d4(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

VOLO-D4 model, Params: 193M

+ +
+ Source code in mindcv/models/volo.py +
825
+826
+827
+828
+829
+830
+831
+832
+833
+834
+835
+836
+837
+838
+839
+840
+841
+842
+843
+844
+845
@register_model
+def volo_d4(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """
+    VOLO-D4 model, Params: 193M
+    """
+    default_cfg = default_cfgs['volo_d4']
+    model = VOLO(layers=[8, 8, 16, 4],
+                 in_channels=in_channels,
+                 num_classes=num_classes,
+                 embed_dims=[384, 768, 768, 768],
+                 num_heads=[12, 16, 16, 16],
+                 mlp_ratios=[3, 3, 3, 3],
+                 downsamples=[True, False, False, False],
+                 outlook_attention=[True, False, False, False],
+                 post_layers=['ca', 'ca'],
+                 **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +
+ + +
+ + + +

+mindcv.models.volo.volo_d5(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

VOLO-D5 model, Params: 296M +stem_hidden_dim=128, the dim in patch embedding is 128 for VOLO-D5

+ +
+ Source code in mindcv/models/volo.py +
848
+849
+850
+851
+852
+853
+854
+855
+856
+857
+858
+859
+860
+861
+862
+863
+864
+865
+866
+867
+868
@register_model
+def volo_d5(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs):
+    """
+    VOLO-D5 model, Params: 296M
+    stem_hidden_dim=128, the dim in patch embedding is 128 for VOLO-D5
+    """
+    default_cfg = default_cfgs['volo_d5']
+    model = VOLO(layers=[12, 12, 20, 4],
+                 embed_dims=[384, 768, 768, 768],
+                 num_heads=[12, 16, 16, 16],
+                 mlp_ratios=[4, 4, 4, 4],
+                 downsamples=[True, False, False, False],
+                 outlook_attention=[True, False, False, False],
+                 post_layers=['ca', 'ca'],
+                 stem_hidden_dim=128,
+                 **kwargs)
+    model.default_cfg = default_cfg
+
+    if pretrained:
+        load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels)
+    return model
+
+
+
+ +

xcit

+ + +
+ + + +

+ mindcv.models.xcit.XCiT + + +

+ + +
+

+ Bases: nn.Cell

+ + +

XCiT model class, based on +"XCiT: Cross-Covariance Image Transformers" <https://arxiv.org/abs/2106.09681>_

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
img_size +
+

input image size

+
+

+ + TYPE: + (int, tuple) + + + DEFAULT: + 224 + +

+
patch_size +
+

patch size

+
+

+ + TYPE: + (int, tuple) + + + DEFAULT: + 16 + +

+
in_chans +
+

number of input channels

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
num_classes +
+

number of classes for classification head

+
+

+ + TYPE: + int + + + DEFAULT: + 1000 + +

+
embed_dim +
+

embedding dimension

+
+

+ + TYPE: + int + + + DEFAULT: + 768 + +

+
depth +
+

depth of transformer

+
+

+ + TYPE: + int + + + DEFAULT: + 12 + +

+
num_heads +
+

number of attention heads

+
+

+ + TYPE: + int + + + DEFAULT: + 12 + +

+
mlp_ratio +
+

ratio of mlp hidden dim to embedding dim

+
+

+ + TYPE: + int + + + DEFAULT: + 4.0 + +

+
qkv_bias +
+

enable bias for qkv if True

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
qk_scale +
+

override default qk scale of head_dim ** -0.5 if set

+
+

+ + TYPE: + float + + + DEFAULT: + None + +

+
drop_rate +
+

dropout rate

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
attn_drop_rate +
+

attention dropout rate

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
drop_path_rate +
+

stochastic depth rate

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
norm_layer +
+

(nn.Module): normalization layer

+
+

+ + TYPE: + nn.Cell + + + DEFAULT: + None + +

+
cls_attn_layers +
+

(int) Depth of Class attention layers

+
+

+ + TYPE: + int + + + DEFAULT: + 2 + +

+
use_pos +
+

(bool) whether to use positional encoding

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
eta +
+

(float) layerscale initialization value

+
+

+ + TYPE: + float + + + DEFAULT: + None + +

+
tokens_norm +
+

(bool) Whether to normalize all tokens or just the cls_token in the CA

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
+ +
+ Source code in mindcv/models/xcit.py +
354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
class XCiT(nn.Cell):
+    r"""XCiT model class, based on
+    `"XCiT: Cross-Covariance Image Transformers" <https://arxiv.org/abs/2106.09681>`_
+    Args:
+        img_size (int, tuple): input image size
+        patch_size (int, tuple): patch size
+        in_chans (int): number of input channels
+        num_classes (int): number of classes for classification head
+        embed_dim (int): embedding dimension
+        depth (int): depth of transformer
+        num_heads (int): number of attention heads
+        mlp_ratio (int): ratio of mlp hidden dim to embedding dim
+        qkv_bias (bool): enable bias for qkv if True
+        qk_scale (float): override default qk scale of head_dim ** -0.5 if set
+        drop_rate (float): dropout rate
+        attn_drop_rate (float): attention dropout rate
+        drop_path_rate (float): stochastic depth rate
+        norm_layer: (nn.Module): normalization layer
+        cls_attn_layers: (int) Depth of Class attention layers
+        use_pos: (bool) whether to use positional encoding
+        eta: (float) layerscale initialization value
+        tokens_norm: (bool) Whether to normalize all tokens or just the cls_token in the CA
+    """
+
+    def __init__(self,
+                 img_size: int = 224,
+                 patch_size: int = 16,
+                 in_chans: int = 3,
+                 num_classes: int = 1000,
+                 embed_dim: int = 768,
+                 depth: int = 12,
+                 num_heads: int = 12,
+                 mlp_ratio: int = 4.,
+                 qkv_bias: bool = True,
+                 qk_scale: float = None,
+                 drop_rate: float = 0.,
+                 attn_drop_rate: float = 0.,
+                 drop_path_rate: float = 0.,
+                 norm_layer: nn.Cell = None,
+                 cls_attn_layers: int = 2,
+                 use_pos: bool = True,
+                 patch_proj: str = 'linear',
+                 eta: float = None,
+                 tokens_norm: bool = False):
+        super().__init__()
+
+        self.num_classes = num_classes
+        self.num_features = self.embed_dim = embed_dim
+        norm_layer = norm_layer or partial(nn.LayerNorm, epsilon=1e-6)
+
+        self.patch_embed = ConvPatchEmbed(img_size=img_size, embed_dim=embed_dim,
+                                          patch_size=patch_size)
+
+        num_patches = self.patch_embed.num_patches
+
+        self.cls_token = Parameter(
+            ops.zeros((1, 1, embed_dim), mstype.float32))
+        self.pos_drop = Dropout(p=drop_rate)
+
+        dpr = [drop_path_rate for i in range(depth)]
+        self.blocks = nn.CellList([
+            XCABlock(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
+                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
+                norm_layer=norm_layer, num_tokens=num_patches, eta=eta)
+            for i in range(depth)])
+
+        self.cls_attn_blocks = nn.CellList([
+            ClassAttentionBlock(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
+                qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, norm_layer=norm_layer,
+                eta=eta, tokens_norm=tokens_norm)
+            for i in range(cls_attn_layers)])
+        self.norm = norm_layer([embed_dim])
+        self.head = nn.Dense(
+            in_channels=embed_dim, out_channels=num_classes) if num_classes > 0 else ops.Identity()
+
+        self.pos_embeder = PositionalEncodingFourier(dim=embed_dim)
+        self.use_pos = use_pos
+
+        # Classifier head
+        self.cls_token.set_data(weight_init.initializer(weight_init.TruncatedNormal(sigma=0.02),
+                                                        self.cls_token.shape,
+                                                        self.cls_token.dtype))
+        self._init_weights()
+
+    def _init_weights(self) -> None:
+        for name, m in self.cells_and_names():
+            if isinstance(m, nn.Dense):
+                m.weight = weight_init.initializer(weight_init.TruncatedNormal(
+                    sigma=0.02), m.weight.shape, mindspore.float32)
+                if m.bias is not None:
+                    m.bias.set_data(weight_init.initializer(
+                        weight_init.Constant(0), m.bias.shape))
+            elif isinstance(m, nn.LayerNorm):
+                m.beta.set_data(weight_init.initializer(
+                    weight_init.Constant(0), m.beta.shape))
+                m.gamma.set_data(weight_init.initializer(
+                    weight_init.Constant(1), m.gamma.shape))
+
+    def forward_features(self, x):
+        B, C, H, W = x.shape
+        x, (Hp, Wp) = self.patch_embed(x)
+        if self.use_pos:
+            pos_encoding = self.pos_embeder(B, Hp, Wp).reshape(
+                B, -1, x.shape[1]).transpose(0, 2, 1)
+            x = x + pos_encoding
+        x = self.pos_drop(x)
+        for blk in self.blocks:
+            x = blk(x, Hp, Wp)
+        cls_tokens = ops.broadcast_to(self.cls_token, (B, -1, -1))
+        cls_tokens = ops.cast(cls_tokens, x.dtype)
+        x = ops.concat((cls_tokens, x), 1)
+
+        for blk in self.cls_attn_blocks:
+            x = blk(x, Hp, Wp)
+        return self.norm(x)[:, 0]
+
+    def construct(self, x):
+        x = self.forward_features(x)
+        x = self.head(x)
+        return x
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + +
+ + + +

+mindcv.models.xcit.xcit_tiny_12_p16_224(pretrained=False, num_classes=1000, in_channels=3, **kwargs) + +

+ + +
+ +

Get xcit_tiny_12_p16_224 model. +Refer to the base class 'models.XCiT' for more details.

+ +
+ Source code in mindcv/models/xcit.py +
478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
@register_model
+def xcit_tiny_12_p16_224(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> XCiT:
+    """Get xcit_tiny_12_p16_224 model.
+    Refer to the base class 'models.XCiT' for more details.
+    """
+    default_cfg = default_cfgs['xcit_tiny_12_p16_224']
+    model = XCiT(
+        patch_size=16, num_classes=num_classes, embed_dim=192, depth=12, num_heads=4, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, epsilon=1e-6), eta=1.0, tokens_norm=True, **kwargs)
+    if pretrained:
+        load_pretrained(model, default_cfg,
+                        num_classes=num_classes, in_channels=in_channels)
+
+    return model
+
+
+
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/reference/optim/index.html b/zh/reference/optim/index.html new file mode 100644 index 000000000..d178f0e2e --- /dev/null +++ b/zh/reference/optim/index.html @@ -0,0 +1,2587 @@ + + + + + + + + + + + + + + + + + + + + + + + + optim - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Optimizer

+

Optimizer Factory

+ + + +
+ + + +

+mindcv.optim.optim_factory.create_optimizer(params, opt='adam', lr=0.001, weight_decay=0, momentum=0.9, nesterov=False, filter_bias_and_bn=True, loss_scale=1.0, schedule_decay=0.004, checkpoint_path='', eps=1e-10, **kwargs) + +

+ + +
+ +

Creates optimizer by name.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
params +
+

network parameters. Union[list[Parameter],list[dict]], which must be the list of parameters +or list of dicts. When the list element is a dictionary, the key of the dictionary can be +"params", "lr", "weight_decay","grad_centralization" and "order_params".

+
+

+

+
opt +
+

wrapped optimizer. You could choose like 'sgd', 'nesterov', 'momentum', 'adam', 'adamw', 'lion', +'rmsprop', 'adagrad', 'lamb'. 'adam' is the default choose for convolution-based networks. +'adamw' is recommended for ViT-based networks. Default: 'adam'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'adam' + +

+
lr +
+

learning rate: float or lr scheduler. Fixed and dynamic learning rate are supported. Default: 1e-3.

+
+

+ + TYPE: + Optional[float] + + + DEFAULT: + 0.001 + +

+
weight_decay +
+

weight decay factor. It should be noted that weight decay can be a constant value or a Cell. +It is a Cell only when dynamic weight decay is applied. Dynamic weight decay is similar to +dynamic learning rate, users need to customize a weight decay schedule only with global step as input, +and during training, the optimizer calls the instance of WeightDecaySchedule to get the weight decay value +of current step. Default: 0.

+
+

+ + TYPE: + float + + + DEFAULT: + 0 + +

+
momentum +
+

momentum if the optimizer supports. Default: 0.9.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.9 + +

+
nesterov +
+

Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
filter_bias_and_bn +
+

whether to filter batch norm parameters and bias from weight decay. +If True, weight decay will not apply on BN parameters and bias in Conv or Dense layers. Default: True.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
loss_scale +
+

A floating point value for the loss scale, which must be larger than 0.0. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

Optimizer object

+
+
+ +
+ Source code in mindcv/optim/optim_factory.py +
 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
def create_optimizer(
+    params,
+    opt: str = "adam",
+    lr: Optional[float] = 1e-3,
+    weight_decay: float = 0,
+    momentum: float = 0.9,
+    nesterov: bool = False,
+    filter_bias_and_bn: bool = True,
+    loss_scale: float = 1.0,
+    schedule_decay: float = 4e-3,
+    checkpoint_path: str = "",
+    eps: float = 1e-10,
+    **kwargs,
+):
+    r"""Creates optimizer by name.
+
+    Args:
+        params: network parameters. Union[list[Parameter],list[dict]], which must be the list of parameters
+            or list of dicts. When the list element is a dictionary, the key of the dictionary can be
+            "params", "lr", "weight_decay","grad_centralization" and "order_params".
+        opt: wrapped optimizer. You could choose like 'sgd', 'nesterov', 'momentum', 'adam', 'adamw', 'lion',
+            'rmsprop', 'adagrad', 'lamb'. 'adam' is the default choose for convolution-based networks.
+            'adamw' is recommended for ViT-based networks. Default: 'adam'.
+        lr: learning rate: float or lr scheduler. Fixed and dynamic learning rate are supported. Default: 1e-3.
+        weight_decay: weight decay factor. It should be noted that weight decay can be a constant value or a Cell.
+            It is a Cell only when dynamic weight decay is applied. Dynamic weight decay is similar to
+            dynamic learning rate, users need to customize a weight decay schedule only with global step as input,
+            and during training, the optimizer calls the instance of WeightDecaySchedule to get the weight decay value
+            of current step. Default: 0.
+        momentum: momentum if the optimizer supports. Default: 0.9.
+        nesterov: Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients. Default: False.
+        filter_bias_and_bn: whether to filter batch norm parameters and bias from weight decay.
+            If True, weight decay will not apply on BN parameters and bias in Conv or Dense layers. Default: True.
+        loss_scale: A floating point value for the loss scale, which must be larger than 0.0. Default: 1.0.
+
+    Returns:
+        Optimizer object
+    """
+
+    opt = opt.lower()
+
+    if weight_decay and filter_bias_and_bn:
+        params = init_group_params(params, weight_decay)
+
+    opt_args = dict(**kwargs)
+    # if lr is not None:
+    #    opt_args.setdefault('lr', lr)
+
+    # non-adaptive: SGD, momentum, and nesterov
+    if opt == "sgd":
+        # note: nn.Momentum may perform better if momentum > 0.
+        optimizer = nn.SGD(
+            params=params,
+            learning_rate=lr,
+            momentum=momentum,
+            weight_decay=weight_decay,
+            nesterov=nesterov,
+            loss_scale=loss_scale,
+            **opt_args,
+        )
+    elif opt in ["momentum", "nesterov"]:
+        optimizer = nn.Momentum(
+            params=params,
+            learning_rate=lr,
+            momentum=momentum,
+            weight_decay=weight_decay,
+            use_nesterov=nesterov,
+            loss_scale=loss_scale,
+        )
+    # adaptive
+    elif opt == "adam":
+        optimizer = nn.Adam(
+            params=params,
+            learning_rate=lr,
+            weight_decay=weight_decay,
+            loss_scale=loss_scale,
+            use_nesterov=nesterov,
+            **opt_args,
+        )
+    elif opt == "adamw":
+        optimizer = AdamW(
+            params=params,
+            learning_rate=lr,
+            weight_decay=weight_decay,
+            loss_scale=loss_scale,
+            **opt_args,
+        )
+    elif opt == "lion":
+        optimizer = Lion(
+            params=params,
+            learning_rate=lr,
+            weight_decay=weight_decay,
+            loss_scale=loss_scale,
+            **opt_args,
+        )
+    elif opt == "nadam":
+        optimizer = NAdam(
+            params=params,
+            learning_rate=lr,
+            weight_decay=weight_decay,
+            loss_scale=loss_scale,
+            schedule_decay=schedule_decay,
+            **opt_args,
+        )
+    elif opt == "adan":
+        optimizer = Adan(
+            params=params,
+            learning_rate=lr,
+            weight_decay=weight_decay,
+            loss_scale=loss_scale,
+            **opt_args,
+        )
+    elif opt == "rmsprop":
+        optimizer = nn.RMSProp(
+            params=params,
+            learning_rate=lr,
+            momentum=momentum,
+            weight_decay=weight_decay,
+            loss_scale=loss_scale,
+            epsilon=eps,
+            **opt_args,
+        )
+    elif opt == "adagrad":
+        optimizer = nn.Adagrad(
+            params=params,
+            learning_rate=lr,
+            weight_decay=weight_decay,
+            loss_scale=loss_scale,
+            **opt_args,
+        )
+    elif opt == "lamb":
+        assert loss_scale == 1.0, "Loss scaler is not supported by Lamb optimizer"
+        optimizer = nn.Lamb(
+            params=params,
+            learning_rate=lr,
+            weight_decay=weight_decay,
+            **opt_args,
+        )
+    else:
+        raise ValueError(f"Invalid optimizer: {opt}")
+
+    if os.path.exists(checkpoint_path):
+        param_dict = load_checkpoint(checkpoint_path)
+        load_param_into_net(optimizer, param_dict)
+
+    return optimizer
+
+
+
+ +

AdamW

+ + +
+ + + +

+ mindcv.optim.adamw.AdamW + + +

+ + +
+

+ Bases: Optimizer

+ + +

Implements the gradient clipping by norm for a AdamWeightDecay optimizer.

+ +
+ Source code in mindcv/optim/adamw.py +
126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
class AdamW(Optimizer):
+    """
+    Implements the gradient clipping by norm for a AdamWeightDecay optimizer.
+    """
+
+    @opt_init_args_register
+    def __init__(
+        self,
+        params,
+        learning_rate=1e-3,
+        beta1=0.9,
+        beta2=0.999,
+        eps=1e-8,
+        weight_decay=0.0,
+        loss_scale=1.0,
+        clip=False,
+    ):
+        super().__init__(learning_rate, params, weight_decay)
+        _check_param_value(beta1, beta2, eps, self.cls_name)
+        self.beta1 = Tensor(np.array([beta1]).astype(np.float32))
+        self.beta2 = Tensor(np.array([beta2]).astype(np.float32))
+        self.eps = Tensor(np.array([eps]).astype(np.float32))
+        self.moments1 = self.parameters.clone(prefix="adam_m", init="zeros")
+        self.moments2 = self.parameters.clone(prefix="adam_v", init="zeros")
+        self.hyper_map = ops.HyperMap()
+        self.beta1_power = Parameter(initializer(1, [1], ms.float32), name="beta1_power")
+        self.beta2_power = Parameter(initializer(1, [1], ms.float32), name="beta2_power")
+
+        self.reciprocal_scale = Tensor(1.0 / loss_scale, ms.float32)
+        self.clip = clip
+
+    def construct(self, gradients):
+        lr = self.get_lr()
+        gradients = scale_grad(gradients, self.reciprocal_scale)
+        if self.clip:
+            gradients = ops.clip_by_global_norm(gradients, 5.0, None)
+
+        beta1_power = self.beta1_power * self.beta1
+        self.beta1_power = beta1_power
+        beta2_power = self.beta2_power * self.beta2
+        self.beta2_power = beta2_power
+
+        if self.is_group:
+            if self.is_group_lr:
+                optim_result = self.hyper_map(
+                    ops.partial(_adam_opt, beta1_power, beta2_power, self.beta1, self.beta2, self.eps),
+                    lr,
+                    self.weight_decay,
+                    self.parameters,
+                    self.moments1,
+                    self.moments2,
+                    gradients,
+                    self.decay_flags,
+                    self.optim_filter,
+                )
+            else:
+                optim_result = self.hyper_map(
+                    ops.partial(_adam_opt, beta1_power, beta2_power, self.beta1, self.beta2, self.eps, lr),
+                    self.weight_decay,
+                    self.parameters,
+                    self.moments1,
+                    self.moments2,
+                    gradients,
+                    self.decay_flags,
+                    self.optim_filter,
+                )
+        else:
+            optim_result = self.hyper_map(
+                ops.partial(
+                    _adam_opt, beta1_power, beta2_power, self.beta1, self.beta2, self.eps, lr, self.weight_decay
+                ),
+                self.parameters,
+                self.moments1,
+                self.moments2,
+                gradients,
+                self.decay_flags,
+                self.optim_filter,
+            )
+        if self.use_parallel:
+            self.broadcast_params(optim_result)
+        return optim_result
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Adan

+ + +
+ + + +

+ mindcv.optim.adan.Adan + + +

+ + +
+

+ Bases: Optimizer

+ + +

The Adan (ADAptive Nesterov momentum algorithm) Optimizer from https://arxiv.org/abs/2208.06677

+

Note: it is an experimental version.

+ +
+ Source code in mindcv/optim/adan.py +
107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
class Adan(Optimizer):
+    """
+    The Adan (ADAptive Nesterov momentum algorithm) Optimizer from https://arxiv.org/abs/2208.06677
+
+    Note: it is an experimental version.
+    """
+
+    @opt_init_args_register
+    def __init__(
+        self,
+        params,
+        learning_rate=1e-3,
+        beta1=0.98,
+        beta2=0.92,
+        beta3=0.99,
+        eps=1e-8,
+        use_locking=False,
+        weight_decay=0.0,
+        loss_scale=1.0,
+    ):
+        super().__init__(
+            learning_rate, params, weight_decay=weight_decay, loss_scale=loss_scale
+        )  # Optimized inherit weight decay is bloaked. weight decay is computed in this py.
+
+        _check_param_value(beta1, beta2, eps, self.cls_name)
+        assert isinstance(use_locking, bool), f"For {self.cls_name}, use_looking should be bool"
+
+        self.beta1 = Tensor(beta1, mstype.float32)
+        self.beta2 = Tensor(beta2, mstype.float32)
+        self.beta3 = Tensor(beta3, mstype.float32)
+
+        self.eps = Tensor(eps, mstype.float32)
+        self.use_locking = use_locking
+        self.moment1 = self._parameters.clone(prefix="moment1", init="zeros")  # m
+        self.moment2 = self._parameters.clone(prefix="moment2", init="zeros")  # v
+        self.moment3 = self._parameters.clone(prefix="moment3", init="zeros")  # n
+        self.prev_gradient = self._parameters.clone(prefix="prev_gradient", init="zeros")
+
+        self.weight_decay = Tensor(weight_decay, mstype.float32)
+
+    def construct(self, gradients):
+        params = self._parameters
+        moment1 = self.moment1
+        moment2 = self.moment2
+        moment3 = self.moment3
+
+        gradients = self.flatten_gradients(gradients)
+        gradients = self.gradients_centralization(gradients)
+        gradients = self.scale_grad(gradients)
+        gradients = self._grad_sparse_indices_deduplicate(gradients)
+        lr = self.get_lr()
+
+        # TODO: currently not support dist
+        success = self.map_(
+            ops.partial(_adan_opt, self.beta1, self.beta2, self.beta3, self.eps, lr, self.weight_decay),
+            params,
+            moment1,
+            moment2,
+            moment3,
+            gradients,
+            self.prev_gradient,
+        )
+
+        return success
+
+    @Optimizer.target.setter
+    def target(self, value):
+        """
+        If the input value is set to "CPU", the parameters will be updated on the host using the Fused
+        optimizer operation.
+        """
+        self._set_base_target(value)
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.optim.adan.Adan.target(value) + +

+ + +
+ +

If the input value is set to "CPU", the parameters will be updated on the host using the Fused +optimizer operation.

+ +
+ Source code in mindcv/optim/adan.py +
172
+173
+174
+175
+176
+177
+178
@Optimizer.target.setter
+def target(self, value):
+    """
+    If the input value is set to "CPU", the parameters will be updated on the host using the Fused
+    optimizer operation.
+    """
+    self._set_base_target(value)
+
+
+
+ +
+ + + +
+ +
+ +

Lion

+ + +
+ + + +

+ mindcv.optim.lion.Lion + + +

+ + +
+

+ Bases: Optimizer

+ + +

Implementation of Lion optimizer from paper 'https://arxiv.org/abs/2302.06675'. +Additionally, this implementation is with gradient clipping.

+

Notes: +lr is usually 3-10x smaller than adamw. +weight decay is usually 3-10x larger than adamw.

+ +
+ Source code in mindcv/optim/lion.py +
112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
class Lion(Optimizer):
+    """
+    Implementation of Lion optimizer from paper 'https://arxiv.org/abs/2302.06675'.
+    Additionally, this implementation is with gradient clipping.
+
+    Notes:
+    lr is usually 3-10x smaller than adamw.
+    weight decay is usually 3-10x larger than adamw.
+    """
+
+    @opt_init_args_register
+    def __init__(
+        self,
+        params,
+        learning_rate=2e-4,
+        beta1=0.9,
+        beta2=0.99,
+        weight_decay=0.0,
+        loss_scale=1.0,
+        clip=False,
+    ):
+        super().__init__(learning_rate, params, weight_decay)
+        _check_param_value(beta1, beta2, self.cls_name)
+        self.beta1 = Tensor(np.array([beta1]).astype(np.float32))
+        self.beta2 = Tensor(np.array([beta2]).astype(np.float32))
+        self.moments1 = self.parameters.clone(prefix="lion_m", init="zeros")
+        self.hyper_map = ops.HyperMap()
+        self.beta1_power = Parameter(initializer(1, [1], ms.float32), name="beta1_power")
+        self.beta2_power = Parameter(initializer(1, [1], ms.float32), name="beta2_power")
+
+        self.reciprocal_scale = Tensor(1.0 / loss_scale, ms.float32)
+        self.clip = clip
+
+    def construct(self, gradients):
+        lr = self.get_lr()
+        gradients = scale_grad(gradients, self.reciprocal_scale)
+        if self.clip:
+            gradients = ops.clip_by_global_norm(gradients, 5.0, None)
+
+        beta1_power = self.beta1_power * self.beta1
+        self.beta1_power = beta1_power
+        beta2_power = self.beta2_power * self.beta2
+        self.beta2_power = beta2_power
+
+        if self.is_group:
+            if self.is_group_lr:
+                optim_result = self.hyper_map(
+                    ops.partial(_lion_opt, beta1_power, beta2_power, self.beta1, self.beta2),
+                    lr,
+                    self.weight_decay,
+                    self.parameters,
+                    self.moments1,
+                    gradients,
+                    self.decay_flags,
+                    self.optim_filter,
+                )
+            else:
+                optim_result = self.hyper_map(
+                    ops.partial(_lion_opt, beta1_power, beta2_power, self.beta1, self.beta2, lr),
+                    self.weight_decay,
+                    self.parameters,
+                    self.moments1,
+                    gradients,
+                    self.decay_flags,
+                    self.optim_filter,
+                )
+        else:
+            optim_result = self.hyper_map(
+                ops.partial(_lion_opt, beta1_power, beta2_power, self.beta1, self.beta2, lr, self.weight_decay),
+                self.parameters,
+                self.moments1,
+                gradients,
+                self.decay_flags,
+                self.optim_filter,
+            )
+        if self.use_parallel:
+            self.broadcast_params(optim_result)
+        return optim_result
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

NAdam

+ + +
+ + + +

+ mindcv.optim.nadam.NAdam + + +

+ + +
+

+ Bases: Optimizer

+ + +

Implements NAdam algorithm (a variant of Adam based on Nesterov momentum).

+ +
+ Source code in mindcv/optim/nadam.py +
23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
class NAdam(Optimizer):
+    """
+    Implements NAdam algorithm (a variant of Adam based on Nesterov momentum).
+    """
+
+    @opt_init_args_register
+    def __init__(
+        self,
+        params,
+        learning_rate=2e-3,
+        beta1=0.9,
+        beta2=0.999,
+        eps=1e-8,
+        weight_decay=0.0,
+        loss_scale=1.0,
+        schedule_decay=4e-3,
+    ):
+        super().__init__(learning_rate, params, weight_decay, loss_scale)
+        _check_param_value(beta1, beta2, eps, self.cls_name)
+        self.beta1 = Tensor(np.array([beta1]).astype(np.float32))
+        self.beta2 = Tensor(np.array([beta2]).astype(np.float32))
+        self.eps = Tensor(np.array([eps]).astype(np.float32))
+        self.moments1 = self.parameters.clone(prefix="nadam_m", init="zeros")
+        self.moments2 = self.parameters.clone(prefix="nadam_v", init="zeros")
+        self.schedule_decay = Tensor(np.array([schedule_decay]).astype(np.float32))
+        self.mu_schedule = Parameter(initializer(1, [1], ms.float32), name="mu_schedule")
+        self.beta2_power = Parameter(initializer(1, [1], ms.float32), name="beta2_power")
+
+    def construct(self, gradients):
+        lr = self.get_lr()
+        params = self.parameters
+        step = self.global_step + _scaler_one
+        gradients = self.decay_weight(gradients)
+        mu = self.beta1 * (
+            _scaler_one - Tensor(0.5, ms.float32) * ops.pow(Tensor(0.96, ms.float32), step * self.schedule_decay)
+        )
+        mu_next = self.beta1 * (
+            _scaler_one
+            - Tensor(0.5, ms.float32) * ops.pow(Tensor(0.96, ms.float32), (step + _scaler_one) * self.schedule_decay)
+        )
+        mu_schedule = self.mu_schedule * mu
+        mu_schedule_next = self.mu_schedule * mu * mu_next
+        self.mu_schedule = mu_schedule
+        beta2_power = self.beta2_power * self.beta2
+        self.beta2_power = beta2_power
+
+        num_params = len(params)
+        for i in range(num_params):
+            ops.assign(self.moments1[i], self.beta1 * self.moments1[i] + (_scaler_one - self.beta1) * gradients[i])
+            ops.assign(
+                self.moments2[i], self.beta2 * self.moments2[i] + (_scaler_one - self.beta2) * ops.square(gradients[i])
+            )
+
+            regulate_m = mu_next * self.moments1[i] / (_scaler_one - mu_schedule_next) + (_scaler_one - mu) * gradients[
+                i
+            ] / (_scaler_one - mu_schedule)
+            regulate_v = self.moments2[i] / (_scaler_one - beta2_power)
+
+            update = params[i] - lr * regulate_m / (self.eps + ops.sqrt(regulate_v))
+            ops.assign(params[i], update)
+
+        return params
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/reference/scheduler/index.html b/zh/reference/scheduler/index.html new file mode 100644 index 000000000..25beeded6 --- /dev/null +++ b/zh/reference/scheduler/index.html @@ -0,0 +1,2599 @@ + + + + + + + + + + + + + + + + + + + + + + + + scheduler - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Learning Rate Scheduler

+

Scheduler Factory

+ + + +
+ + + +

+mindcv.scheduler.scheduler_factory.create_scheduler(steps_per_epoch, scheduler='constant', lr=0.01, min_lr=1e-06, warmup_epochs=3, warmup_factor=0.0, decay_epochs=10, decay_rate=0.9, milestones=None, num_epochs=200, num_cycles=1, cycle_decay=1.0, lr_epoch_stair=False) + +

+ + +
+ +

Creates learning rate scheduler by name.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
steps_per_epoch +
+

number of steps per epoch.

+
+

+ + TYPE: + int + +

+
scheduler +
+

scheduler name like 'constant', 'cosine_decay', 'step_decay', +'exponential_decay', 'polynomial_decay', 'multi_step_decay'. Default: 'constant'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'constant' + +

+
lr +
+

learning rate value. Default: 0.01.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.01 + +

+
min_lr +
+

lower lr bound for 'cosine_decay' schedulers. Default: 1e-6.

+
+

+ + TYPE: + float + + + DEFAULT: + 1e-06 + +

+
warmup_epochs +
+

epochs to warmup LR, if scheduler supports. Default: 3.

+
+

+ + TYPE: + int + + + DEFAULT: + 3 + +

+
warmup_factor +
+

the warmup phase of scheduler is a linearly increasing lr, +the beginning factor is warmup_factor, i.e., the lr of the first step/epoch is lr*warmup_factor, +and the ending lr in the warmup phase is lr. Default: 0.0

+
+

+ + TYPE: + float + + + DEFAULT: + 0.0 + +

+
decay_epochs +
+

for 'cosine_decay' schedulers, decay LR to min_lr in decay_epochs. +For 'step_decay' scheduler, decay LR by a factor of decay_rate every decay_epochs. Default: 10.

+
+

+ + TYPE: + int + + + DEFAULT: + 10 + +

+
decay_rate +
+

LR decay rate. Default: 0.9.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.9 + +

+
milestones +
+

list of epoch milestones for 'multi_step_decay' scheduler. Must be increasing. Default: None

+
+

+ + TYPE: + list + + + DEFAULT: + None + +

+
num_epochs +
+

Number of total epochs. Default: 200.

+
+

+ + TYPE: + int + + + DEFAULT: + 200 + +

+
num_cycles +
+

Number of cycles for cosine decay and cyclic. Default: 1.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
cycle_decay +
+

Decay rate of lr max in each cosine cycle. Default: 1.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
lr_epoch_stair +
+

If True, LR will be updated in the beginning of each new epoch +and the LR will be consistent for each batch in one epoch. +Otherwise, learning rate will be updated dynamically in each step. Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

Cell object for computing LR with input of current global steps

+
+
+ +
+ Source code in mindcv/scheduler/scheduler_factory.py +
 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
def create_scheduler(
+    steps_per_epoch: int,
+    scheduler: str = "constant",
+    lr: float = 0.01,
+    min_lr: float = 1e-6,
+    warmup_epochs: int = 3,
+    warmup_factor: float = 0.0,
+    decay_epochs: int = 10,
+    decay_rate: float = 0.9,
+    milestones: list = None,
+    num_epochs: int = 200,
+    num_cycles: int = 1,
+    cycle_decay: float = 1.0,
+    lr_epoch_stair: bool = False,
+):
+    r"""Creates learning rate scheduler by name.
+
+    Args:
+        steps_per_epoch: number of steps per epoch.
+        scheduler: scheduler name like 'constant', 'cosine_decay', 'step_decay',
+            'exponential_decay', 'polynomial_decay', 'multi_step_decay'. Default: 'constant'.
+        lr: learning rate value. Default: 0.01.
+        min_lr: lower lr bound for 'cosine_decay' schedulers. Default: 1e-6.
+        warmup_epochs: epochs to warmup LR, if scheduler supports. Default: 3.
+        warmup_factor: the warmup phase of scheduler is a linearly increasing lr,
+            the beginning factor is `warmup_factor`, i.e., the lr of the first step/epoch is lr*warmup_factor,
+            and the ending lr in the warmup phase is lr. Default: 0.0
+        decay_epochs: for 'cosine_decay' schedulers, decay LR to min_lr in `decay_epochs`.
+            For 'step_decay' scheduler, decay LR by a factor of `decay_rate` every `decay_epochs`. Default: 10.
+        decay_rate: LR decay rate. Default: 0.9.
+        milestones: list of epoch milestones for 'multi_step_decay' scheduler. Must be increasing. Default: None
+        num_epochs: Number of total epochs. Default: 200.
+        num_cycles: Number of cycles for cosine decay and cyclic. Default: 1.
+        cycle_decay: Decay rate of lr max in each cosine cycle. Default: 1.0.
+        lr_epoch_stair: If True, LR will be updated in the beginning of each new epoch
+            and the LR will be consistent for each batch in one epoch.
+            Otherwise, learning rate will be updated dynamically in each step. Default: False.
+    Returns:
+        Cell object for computing LR with input of current global steps
+    """
+    # check params
+    if milestones is None:
+        milestones = []
+
+    if warmup_epochs + decay_epochs > num_epochs:
+        _logger.warning("warmup_epochs + decay_epochs > num_epochs. Please check and reduce decay_epochs!")
+
+    # lr warmup phase
+    warmup_lr_scheduler = []
+    if warmup_epochs > 0:
+        if warmup_factor == 0 and lr_epoch_stair:
+            _logger.warning(
+                "The warmup factor is set to 0, lr of 0-th epoch is always zero! " "Recommend value is 0.01."
+            )
+        warmup_func = linear_lr if lr_epoch_stair else linear_refined_lr
+        warmup_lr_scheduler = warmup_func(
+            start_factor=warmup_factor,
+            end_factor=1.0,
+            total_iters=warmup_epochs,
+            lr=lr,
+            steps_per_epoch=steps_per_epoch,
+            epochs=warmup_epochs,
+        )
+
+    # lr decay phase
+    main_epochs = num_epochs - warmup_epochs
+    if scheduler in ["cosine_decay", "warmup_cosine_decay"]:
+        cosine_func = cosine_decay_lr if lr_epoch_stair else cosine_decay_refined_lr
+        main_lr_scheduler = cosine_func(
+            decay_epochs=decay_epochs,
+            eta_min=min_lr,
+            eta_max=lr,
+            steps_per_epoch=steps_per_epoch,
+            epochs=main_epochs,
+            num_cycles=num_cycles,
+            cycle_decay=cycle_decay,
+        )
+    elif scheduler == "one_cycle":
+        if lr_epoch_stair or warmup_epochs > 0:
+            raise ValueError(
+                "OneCycle scheduler doesn't support learning rate varies with epoch and warmup_epochs > 0."
+            )
+        div_factor = 25.0
+        initial_lr = lr / div_factor
+        final_div_factor = initial_lr / min_lr
+        main_lr_scheduler = one_cycle_lr(
+            max_lr=lr,
+            final_div_factor=final_div_factor,
+            steps_per_epoch=steps_per_epoch,
+            epochs=main_epochs,
+        )
+    elif scheduler == "cyclic":
+        if lr_epoch_stair or warmup_epochs > 0:
+            raise ValueError("Cyclic scheduler doesn't support learning rate varies with epoch and warmup_epochs > 0.")
+        num_steps = steps_per_epoch * main_epochs
+        step_size_up = int(num_steps / num_cycles / 2)
+        main_lr_scheduler = cyclic_lr(
+            base_lr=min_lr,
+            max_lr=lr,
+            step_size_up=step_size_up,
+            steps_per_epoch=steps_per_epoch,
+            epochs=main_epochs,
+        )
+    elif scheduler == "exponential_decay":
+        exponential_func = exponential_lr if lr_epoch_stair else exponential_refined_lr
+        main_lr_scheduler = exponential_func(
+            gamma=decay_rate, lr=lr, steps_per_epoch=steps_per_epoch, epochs=main_epochs
+        )
+    elif scheduler == "polynomial_decay":
+        polynomial_func = polynomial_lr if lr_epoch_stair else polynomial_refined_lr
+        main_lr_scheduler = polynomial_func(
+            total_iters=main_epochs, power=decay_rate, lr=lr, steps_per_epoch=steps_per_epoch, epochs=main_epochs
+        )
+    elif scheduler == "step_decay":
+        main_lr_scheduler = step_lr(
+            step_size=decay_epochs, gamma=decay_rate, lr=lr, steps_per_epoch=steps_per_epoch, epochs=main_epochs
+        )
+    elif scheduler == "multi_step_decay":
+        main_lr_scheduler = multi_step_lr(
+            milestones=milestones, gamma=decay_rate, lr=lr, steps_per_epoch=steps_per_epoch, epochs=main_epochs
+        )
+    elif scheduler == "constant":
+        main_lr_scheduler = [lr for _ in range(steps_per_epoch * main_epochs)]
+    else:
+        raise ValueError(f"Invalid scheduler: {scheduler}")
+
+    # combine
+    lr_scheduler = warmup_lr_scheduler + main_lr_scheduler
+
+    return lr_scheduler
+
+
+
+ +
+ +
+ + + +

+ mindcv.scheduler.dynamic_lr + + +

+ +
+ +

Meta learning rate scheduler.

+

This module implements exactly the same learning rate scheduler as native PyTorch, +see "torch.optim.lr_scheduler" <https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate>_. +At present, only constant_lr, linear_lr, polynomial_lr, exponential_lr, step_lr, multi_step_lr, +cosine_annealing_lr, cosine_annealing_warm_restarts_lr, one_cycle_lr, cyclic_lr are implemented. +The number, name and usage of the Positional Arguments are exactly the same as those of native PyTorch.

+

However, due to the constraint of having to explicitly return the learning rate at each step, we have to +introduce additional Keyword Arguments. There are only three Keyword Arguments introduced, +namely lr, steps_per_epoch and epochs, explained as follows: +lr: the basic learning rate when creating optim in torch. +steps_per_epoch: the number of steps(iterations) of each epoch. +epochs: the number of epoch. It and steps_per_epoch determine the length of the returned lrs.

+

In all schedulers, one_cycle_lr and cyclic_lr only need two Keyword Arguments except lr, since +when creating optim in torch, lr argument will have no effect if using the two schedulers above.

+

Since most scheduler in PyTorch are coarse-grained, that is the learning rate is constant within a single epoch. +For non-stepwise scheduler, we introduce several fine-grained variation, that is the learning rate +is also changed within a single epoch. The function name of these variants have the refined keyword. +The implemented fine-grained variation are list as follows: linear_refined_lr, polynomial_refined_lr, etc.

+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.scheduler.dynamic_lr.cosine_decay_lr(decay_epochs, eta_min, *, eta_max, steps_per_epoch, epochs, num_cycles=1, cycle_decay=1.0) + +

+ + +
+ +

update every epoch

+ +
+ Source code in mindcv/scheduler/dynamic_lr.py +
120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
def cosine_decay_lr(decay_epochs, eta_min, *, eta_max, steps_per_epoch, epochs, num_cycles=1, cycle_decay=1.0):
+    """update every epoch"""
+    tot_steps = steps_per_epoch * epochs
+    lrs = []
+
+    for c in range(num_cycles):
+        lr_max = eta_max * (cycle_decay**c)
+        delta = 0.5 * (lr_max - eta_min)
+        for i in range(steps_per_epoch * decay_epochs):
+            t_cur = math.floor(i / steps_per_epoch)
+            t_cur = min(t_cur, decay_epochs)
+            lr_cur = eta_min + delta * (1.0 + math.cos(math.pi * t_cur / decay_epochs))
+            if len(lrs) < tot_steps:
+                lrs.append(lr_cur)
+            else:
+                break
+
+    if epochs > num_cycles * decay_epochs:
+        for i in range((epochs - (num_cycles * decay_epochs)) * steps_per_epoch):
+            lrs.append(eta_min)
+
+    return lrs
+
+
+
+ +
+ + +
+ + + +

+mindcv.scheduler.dynamic_lr.cosine_decay_refined_lr(decay_epochs, eta_min, *, eta_max, steps_per_epoch, epochs, num_cycles=1, cycle_decay=1.0) + +

+ + +
+ +

update every step

+ +
+ Source code in mindcv/scheduler/dynamic_lr.py +
144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
def cosine_decay_refined_lr(decay_epochs, eta_min, *, eta_max, steps_per_epoch, epochs, num_cycles=1, cycle_decay=1.0):
+    """update every step"""
+    tot_steps = steps_per_epoch * epochs
+    lrs = []
+
+    for c in range(num_cycles):
+        lr_max = eta_max * (cycle_decay**c)
+        delta = 0.5 * (lr_max - eta_min)
+        for i in range(steps_per_epoch * decay_epochs):
+            t_cur = i / steps_per_epoch
+            t_cur = min(t_cur, decay_epochs)
+            lr_cur = eta_min + delta * (1.0 + math.cos(math.pi * t_cur / decay_epochs))
+            if len(lrs) < tot_steps:
+                lrs.append(lr_cur)
+            else:
+                break
+
+    if epochs > num_cycles * decay_epochs:
+        for i in range((epochs - (num_cycles * decay_epochs)) * steps_per_epoch):
+            lrs.append(eta_min)
+
+    return lrs
+
+
+
+ +
+ + +
+ + + +

+mindcv.scheduler.dynamic_lr.cyclic_lr(base_lr, max_lr, step_size_up=2000, step_size_down=None, mode='triangular', gamma=1.0, scale_fn=None, scale_mode='cycle', *, steps_per_epoch, epochs) + +

+ + +
+ +

Cyclic learning rate scheduler based on +'"Cyclical Learning Rates for Training Neural Networks" https://arxiv.org/abs/1708.07120'

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
base_lr +
+

Lower learning rate boundaries in each cycle.

+
+

+ + TYPE: + float + +

+
max_lr +
+

Upper learning rate boundaries in each cycle.

+
+

+ + TYPE: + float + +

+
step_size_up +
+

Number of steps in the increasing half in each cycle. Default: 2000.

+
+

+ + TYPE: + int + + + DEFAULT: + 2000 + +

+
step_size_down +
+

Number of steps in the increasing half in each cycle. If step_size_down +is None, it's set to step_size_up. Default: None.

+
+

+ + DEFAULT: + None + +

+
div_factor +
+

Initial learning rate via initial_lr = max_lr / div_factor. +Default: 25.0.

+
+

+

+
final_div_factor +
+

Minimum learning rate at the end via +min_lr = initial_lr / final_div_factor. Default: 10000.0.

+
+

+

+
mode +
+

One of {triangular, triangular2, exp_range}. If scale_fn is not None, it's set to +None. Default: 'triangular'.

+
+

+ + TYPE: + str + + + DEFAULT: + 'triangular' + +

+
gamma +
+

Constant in 'exp_range' calculating fuction: gamma**(cycle_iterations). +Default: 1.0

+
+

+ + DEFAULT: + 1.0 + +

+
scale_fn +
+

Custom scaling policy defined by a single argument lambda function. If it's +not None, 'mode' is ignored. Default: None

+
+

+ + DEFAULT: + None + +

+
scale_mode +
+

One of {'cycle', 'iterations'}. Determine scale_fn is evaluated on cycle +number or cycle iterations. Default: 'cycle'

+
+

+ + DEFAULT: + 'cycle' + +

+
steps_per_epoch +
+

Number of steps per epoch.

+
+

+ + TYPE: + int + +

+
epochs +
+

Number of total epochs.

+
+

+ + TYPE: + int + +

+
+ +
+ Source code in mindcv/scheduler/dynamic_lr.py +
266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
def cyclic_lr(
+    base_lr: float,
+    max_lr: float,
+    step_size_up: int = 2000,
+    step_size_down=None,
+    mode: str = "triangular",
+    gamma=1.0,
+    scale_fn=None,
+    scale_mode="cycle",
+    *,
+    steps_per_epoch: int,
+    epochs: int,
+):
+    """
+    Cyclic learning rate scheduler based on
+    '"Cyclical Learning Rates for Training Neural Networks" <https://arxiv.org/abs/1708.07120>'
+
+    Args:
+        base_lr: Lower learning rate boundaries in each cycle.
+        max_lr: Upper learning rate boundaries in each cycle.
+        step_size_up: Number of steps in the increasing half in each cycle. Default: 2000.
+        step_size_down: Number of steps in the increasing half in each cycle. If step_size_down
+            is None, it's set to step_size_up. Default: None.
+        div_factor: Initial learning rate via initial_lr = max_lr / div_factor.
+            Default: 25.0.
+        final_div_factor: Minimum learning rate at the end via
+            min_lr = initial_lr / final_div_factor. Default: 10000.0.
+        mode: One of {triangular, triangular2, exp_range}. If scale_fn is not None, it's set to
+            None. Default: 'triangular'.
+        gamma: Constant in 'exp_range' calculating fuction: gamma**(cycle_iterations).
+            Default: 1.0
+        scale_fn: Custom scaling policy defined by a single argument lambda function. If it's
+            not None, 'mode' is ignored. Default: None
+        scale_mode: One of {'cycle', 'iterations'}. Determine scale_fn is evaluated on cycle
+            number or cycle iterations. Default: 'cycle'
+        steps_per_epoch: Number of steps per epoch.
+        epochs: Number of total epochs.
+    """
+
+    def _triangular_scale_fn(x):
+        return 1.0
+
+    def _triangular2_scale_fn(x):
+        return 1 / (2.0**(x - 1))
+
+    def _exp_range_scale_fn(x):
+        return gamma**x
+
+    steps = steps_per_epoch * epochs
+    step_size_up = float(step_size_up)
+    step_size_down = float(step_size_down) if step_size_down is not None else step_size_up
+    total_size = step_size_up + step_size_down
+    step_ratio = step_size_up / total_size
+    if scale_fn is None:
+        if mode == "triangular":
+            scale_fn = _triangular_scale_fn
+            scale_mode = "cycle"
+        elif mode == "triangular2":
+            scale_fn = _triangular2_scale_fn
+            scale_mode = "cycle"
+        elif mode == "exp_range":
+            scale_fn = _exp_range_scale_fn
+            scale_mode = "iterations"
+    lrs = []
+    for i in range(steps):
+        cycle = math.floor(1 + i / total_size)
+        x = 1.0 + i / total_size - cycle
+        if x <= step_ratio:
+            scale_factor = x / step_ratio
+        else:
+            scale_factor = (x - 1) / (step_ratio - 1)
+        base_height = (max_lr - base_lr) * scale_factor
+        if scale_mode == "cycle":
+            lrs.append(base_lr + base_height * scale_fn(cycle))
+        else:
+            lrs.append(base_lr + base_height * scale_fn(i))
+    return lrs
+
+
+
+ +
+ + +
+ + + +

+mindcv.scheduler.dynamic_lr.one_cycle_lr(max_lr, pct_start=0.3, anneal_strategy='cos', div_factor=25.0, final_div_factor=10000.0, three_phase=False, *, steps_per_epoch, epochs) + +

+ + +
+ +

OneCycle learning rate scheduler based on +'"Super-Convergence: Very Fast Training of Neural Networks Using Large Learning Rates" +https://arxiv.org/abs/1708.07120'

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
max_lr +
+

Upper learning rate boundaries in the cycle.

+
+

+ + TYPE: + float + +

+
pct_start +
+

The percentage of the number of steps of increasing learning rate +in the cycle. Default: 0.3.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.3 + +

+
anneal_strategy +
+

Define the annealing strategy: "cos" for cosine annealing, +"linear" for linear annealing. Default: "cos".

+
+

+ + TYPE: + str + + + DEFAULT: + 'cos' + +

+
div_factor +
+

Initial learning rate via initial_lr = max_lr / div_factor. +Default: 25.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 25.0 + +

+
final_div_factor +
+

Minimum learning rate at the end via +min_lr = initial_lr / final_div_factor. Default: 10000.0.

+
+

+ + TYPE: + float + + + DEFAULT: + 10000.0 + +

+
three_phase +
+

If True, learning rate will be updated by three-phase according to +"final_div_factor". Otherwise, learning rate will be updated by two-phase. +Default: False.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
steps_per_epoch +
+

Number of steps per epoch.

+
+

+ + TYPE: + int + +

+
epochs +
+

Number of total epochs.

+
+

+ + TYPE: + int + +

+
+ +
+ Source code in mindcv/scheduler/dynamic_lr.py +
197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
def one_cycle_lr(
+    max_lr: float,
+    pct_start: float = 0.3,
+    anneal_strategy: str = "cos",
+    div_factor: float = 25.0,
+    final_div_factor: float = 10000.0,
+    three_phase: bool = False,
+    *,
+    steps_per_epoch: int,
+    epochs: int,
+):
+    """
+    OneCycle learning rate scheduler based on
+    '"Super-Convergence: Very Fast Training of Neural Networks Using Large Learning Rates"
+    <https://arxiv.org/abs/1708.07120>'
+
+    Args:
+        max_lr: Upper learning rate boundaries in the cycle.
+        pct_start: The percentage of the number of steps of increasing learning rate
+            in the cycle. Default: 0.3.
+        anneal_strategy: Define the annealing strategy: "cos" for cosine annealing,
+            "linear" for linear annealing. Default: "cos".
+        div_factor: Initial learning rate via initial_lr = max_lr / div_factor.
+            Default: 25.0.
+        final_div_factor: Minimum learning rate at the end via
+            min_lr = initial_lr / final_div_factor. Default: 10000.0.
+        three_phase: If True, learning rate will be updated by three-phase according to
+            "final_div_factor". Otherwise, learning rate will be updated by two-phase.
+            Default: False.
+        steps_per_epoch: Number of steps per epoch.
+        epochs: Number of total epochs.
+    """
+
+    def _annealing_cos(start, end, pct):
+        cos_out = math.cos(math.pi * pct) + 1
+        return end + (start - end) / 2.0 * cos_out
+
+    def _annealing_linear(start, end, pct):
+        return (end - start) * pct + start
+
+    initial_lr = max_lr / div_factor
+    min_lr = initial_lr / final_div_factor
+    steps = steps_per_epoch * epochs
+    step_size_up = float(pct_start * steps) - 1
+    step_size_down = float(2 * pct_start * steps) - 2
+    step_size_end = float(steps) - 1
+    if anneal_strategy == "cos":
+        anneal_func = _annealing_cos
+    elif anneal_strategy == "linear":
+        anneal_func = _annealing_linear
+    else:
+        raise ValueError(f"anneal_strategy must be one of 'cos' or 'linear', but got {anneal_strategy}")
+    lrs = []
+    for i in range(steps):
+        if three_phase:
+            if i <= step_size_up:
+                lrs.append(anneal_func(initial_lr, max_lr, i / step_size_up))
+            elif step_size_up < i <= step_size_down:
+                lrs.append(anneal_func(max_lr, initial_lr, (i - step_size_up) / (step_size_down - step_size_up)))
+            else:
+                lrs.append(anneal_func(initial_lr, min_lr, (i - step_size_down) / (step_size_end - step_size_down)))
+        else:
+            if i <= step_size_up:
+                lrs.append(anneal_func(initial_lr, max_lr, i / step_size_up))
+            else:
+                lrs.append(anneal_func(max_lr, min_lr, (i - step_size_up) / (step_size_end - step_size_up)))
+    return lrs
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/reference/utils/index.html b/zh/reference/utils/index.html new file mode 100644 index 000000000..036ec3636 --- /dev/null +++ b/zh/reference/utils/index.html @@ -0,0 +1,3282 @@ + + + + + + + + + + + + + + + + + + + + + + + + utils - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Utility

+

Logger

+ + + +
+ + + +

+mindcv.utils.logger.set_logger(name=None, output_dir=None, rank=0, log_level=logging.INFO, color=True) + +

+ + +
+ +

Initialize the logger.

+

If the logger has not been initialized, this method will initialize the +logger by adding one or two handlers, otherwise the initialized logger will +be directly returned. During initialization, only logger of the master +process is added console handler. If output_dir is specified, all loggers +will be added file handler.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
name +
+

Logger name. Defaults to None to set up root logger.

+
+

+ + TYPE: + Optional[str] + + + DEFAULT: + None + +

+
output_dir +
+

The directory to save log.

+
+

+ + TYPE: + Optional[str] + + + DEFAULT: + None + +

+
rank +
+

Process rank in the distributed training. Defaults to 0.

+
+

+ + TYPE: + int + + + DEFAULT: + 0 + +

+
log_level +
+

Verbosity level of the logger. Defaults to logging.INFO.

+
+

+ + TYPE: + int + + + DEFAULT: + logging.INFO + +

+
color +
+

If True, color the output. Defaults to True.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + logging.Logger + + +
+

logging.Logger: A initialized logger.

+
+
+ +
+ Source code in mindcv/utils/logger.py +
 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
def set_logger(
+    name: Optional[str] = None,
+    output_dir: Optional[str] = None,
+    rank: int = 0,
+    log_level: int = logging.INFO,
+    color: bool = True,
+) -> logging.Logger:
+    """Initialize the logger.
+
+    If the logger has not been initialized, this method will initialize the
+    logger by adding one or two handlers, otherwise the initialized logger will
+    be directly returned. During initialization, only logger of the master
+    process is added console handler. If ``output_dir`` is specified, all loggers
+    will be added file handler.
+
+    Args:
+        name: Logger name. Defaults to None to set up root logger.
+        output_dir: The directory to save log.
+        rank: Process rank in the distributed training. Defaults to 0.
+        log_level: Verbosity level of the logger. Defaults to ``logging.INFO``.
+        color: If True, color the output. Defaults to True.
+
+    Returns:
+        logging.Logger: A initialized logger.
+    """
+    if name in logger_initialized:
+        return logger_initialized[name]
+
+    # get root logger if name is None
+    logger = logging.getLogger(name)
+    logger.setLevel(log_level)
+    # the messages of this logger will not be propagated to its parent
+    logger.propagate = False
+
+    fmt = "%(asctime)s %(name)s %(levelname)s - %(message)s"
+    datefmt = "[%Y-%m-%d %H:%M:%S]"
+
+    # create console handler for master process
+    if rank == 0:
+        if color:
+            if has_rich:
+                console_handler = RichHandler(level=log_level, log_time_format=datefmt)
+            elif has_termcolor:
+                console_handler = logging.StreamHandler(stream=sys.stdout)
+                console_handler.setLevel(log_level)
+                console_handler.setFormatter(_ColorfulFormatter(fmt=fmt, datefmt=datefmt))
+            else:
+                raise NotImplementedError("If you want color, 'rich' or 'termcolor' has to be installed!")
+        else:
+            console_handler = logging.StreamHandler(stream=sys.stdout)
+            console_handler.setLevel(log_level)
+            console_handler.setFormatter(logging.Formatter(fmt=fmt, datefmt=datefmt))
+        logger.addHandler(console_handler)
+
+    if output_dir is not None:
+        os.makedirs(output_dir, exist_ok=True)
+        file_handler = logging.FileHandler(os.path.join(output_dir, f"rank{rank}.log"))
+        file_handler.setLevel(log_level)
+        file_handler.setFormatter(logging.Formatter(fmt=fmt, datefmt=datefmt))
+        logger.addHandler(file_handler)
+
+    logger_initialized[name] = logger
+    return logger
+
+
+
+ +

Callbacks

+ + +
+ + + +

+ mindcv.utils.callbacks.StateMonitor + + +

+ + +
+

+ Bases: Callback

+ + +

Train loss and validation accuracy monitor, after each epoch save the +best checkpoint file with the highest validation accuracy.

+ +
+ Source code in mindcv/utils/callbacks.py +
 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
class StateMonitor(Callback):
+    """
+    Train loss and validation accuracy monitor, after each epoch save the
+    best checkpoint file with the highest validation accuracy.
+    """
+
+    def __init__(
+        self,
+        model,
+        model_name="",
+        model_ema=False,
+        last_epoch=0,
+        dataset_sink_mode=True,
+        dataset_val=None,
+        metric_name=("accuracy",),
+        val_interval=1,
+        val_start_epoch=1,
+        save_best_ckpt=True,
+        ckpt_save_dir="./",
+        ckpt_save_interval=1,
+        ckpt_save_policy=None,
+        ckpt_keep_max=10,
+        summary_dir="./",
+        log_interval=100,
+        rank_id=None,
+        device_num=None,
+    ):
+        super().__init__()
+        # model
+        self.model = model
+        self.model_name = model_name
+        self.model_ema = model_ema
+        self.last_epoch = last_epoch
+        self.dataset_sink_mode = dataset_sink_mode
+        # evaluation
+        self.dataset_val = dataset_val
+        self.metric_name = metric_name
+        self.val_interval = val_interval
+        self.val_start_epoch = val_start_epoch
+        # logging
+        self.best_res = 0
+        self.best_epoch = -1
+        self.save_best_ckpt = save_best_ckpt
+        self.ckpt_save_dir = ckpt_save_dir
+        self.ckpt_save_interval = ckpt_save_interval
+        self.ckpt_save_policy = ckpt_save_policy
+        self.ckpt_keep_max = ckpt_keep_max
+        self.ckpt_manager = CheckpointManager(ckpt_save_policy=self.ckpt_save_policy)
+        self._need_flush_from_cache = True
+        self.summary_dir = summary_dir
+        self.log_interval = log_interval
+        # system
+        self.rank_id = rank_id if rank_id is not None else 0
+        self.device_num = device_num if rank_id is not None else 1
+        if self.rank_id in [0, None]:
+            os.makedirs(ckpt_save_dir, exist_ok=True)
+            self.log_file = os.path.join(ckpt_save_dir, "result.log")
+            log_line = "".join(
+                f"{s:<20}" for s in ["Epoch", "TrainLoss", *metric_name, "TrainTime", "EvalTime", "TotalTime"]
+            )
+            with open(self.log_file, "w", encoding="utf-8") as fp:  # writing the title of result.log
+                fp.write(log_line + "\n")
+        if self.device_num > 1:
+            self.all_reduce = AllReduceSum()
+        # timestamp
+        self.step_ts = None
+        self.epoch_ts = None
+        self.step_time_accum = 0
+        # model_ema
+        if self.model_ema:
+            self.hyper_map = ops.HyperMap()
+            self.online_params = ParameterTuple(self.model.train_network.get_parameters())
+            self.swap_params = self.online_params.clone("swap", "zeros")
+
+    def __enter__(self):
+        self.summary_record = SummaryRecord(self.summary_dir)
+        return self
+
+    def __exit__(self, *exc_args):
+        self.summary_record.close()
+
+    def apply_eval(self, run_context):
+        """Model evaluation, return validation accuracy."""
+        if self.model_ema:
+            cb_params = run_context.original_args()
+            self.hyper_map(ops.assign, self.swap_params, self.online_params)
+            ema_dict = dict()
+            net = self._get_network_from_cbp(cb_params)
+            for param in net.get_parameters():
+                if param.name.startswith("ema"):
+                    new_name = param.name.split("ema.")[1]
+                    ema_dict[new_name] = param.data
+            load_param_into_net(self.model.train_network.network, ema_dict)
+            res_dict = self.model.eval(self.dataset_val, dataset_sink_mode=False)
+            self.hyper_map(ops.assign, self.online_params, self.swap_params)
+        else:
+            res_dict = self.model.eval(self.dataset_val, dataset_sink_mode=False)
+        res_array = ms.Tensor(list(res_dict.values()), ms.float32)
+        if self.device_num > 1:
+            res_array = self.all_reduce(res_array)
+            res_array /= self.device_num
+        res_array = res_array.asnumpy()
+        return res_array
+
+    def on_train_step_begin(self, run_context):
+        self.step_ts = time()
+
+    def on_train_epoch_begin(self, run_context):
+        self.epoch_ts = time()
+
+    def on_train_step_end(self, run_context):
+        cb_params = run_context.original_args()
+        num_epochs = cb_params.epoch_num
+        num_batches = cb_params.batch_num
+        # num_steps = num_batches * num_epochs
+        # cur_x start from 1, end at num_xs, range: [1, num_xs]
+        cur_step = cb_params.cur_step_num + self.last_epoch * num_batches
+        cur_epoch = cb_params.cur_epoch_num + self.last_epoch
+        cur_batch = (cur_step - 1) % num_batches + 1
+
+        self.step_time_accum += time() - self.step_ts
+        if cur_batch % self.log_interval == 0 or cur_batch == num_batches or cur_batch == 1:
+            lr = self._get_lr_from_cbp(cb_params)
+            loss = self._get_loss_from_cbp(cb_params)
+            _logger.info(
+                f"Epoch: [{cur_epoch}/{num_epochs}], "
+                f"batch: [{cur_batch}/{num_batches}], "
+                f"loss: {loss.asnumpy():.6f}, "
+                f"lr: {lr.asnumpy():.6f}, "
+                f"time: {self.step_time_accum:.6f}s"
+            )
+            self.step_time_accum = 0
+
+    def on_train_epoch_end(self, run_context):
+        """
+        After epoch, print train loss and val accuracy,
+        save the best ckpt file with the highest validation accuracy.
+        """
+        cb_params = run_context.original_args()
+        num_epochs = cb_params.epoch_num
+        num_batches = cb_params.batch_num
+        cur_step = cb_params.cur_step_num + self.last_epoch * num_batches
+        cur_epoch = cb_params.cur_epoch_num + self.last_epoch
+        cur_batch = (cur_step - 1) % num_batches + 1
+
+        train_time = time() - self.epoch_ts
+        loss = self._get_loss_from_cbp(cb_params)
+
+        val_time = 0
+        res = np.zeros(len(self.metric_name), dtype=np.float32)
+        # val while training if validation loader is not None
+        if (
+            self.dataset_val is not None
+            and cur_epoch >= self.val_start_epoch
+            and (cur_epoch - self.val_start_epoch) % self.val_interval == 0
+        ):
+            val_time = time()
+            res = self.apply_eval(run_context)
+            val_time = time() - val_time
+            # record val acc
+            metric_str = "Validation "
+            for i in range(len(self.metric_name)):
+                metric_str += f"{self.metric_name[i]}: {res[i]:.4%}, "
+            metric_str += f"time: {val_time:.6f}s"
+            _logger.info(metric_str)
+            # save the best ckpt file
+            if res[0] > self.best_res:
+                self.best_res = res[0]
+                self.best_epoch = cur_epoch
+                _logger.info(f"=> New best val acc: {res[0]:.4%}")
+
+        # save checkpoint
+        if self.rank_id in [0, None]:
+            if self.save_best_ckpt and self.best_epoch == cur_epoch:  # always save ckpt if cur epoch got best acc
+                best_ckpt_save_path = os.path.join(self.ckpt_save_dir, f"{self.model_name}_best.ckpt")
+                save_checkpoint(cb_params.train_network, best_ckpt_save_path, async_save=True)
+            if (cur_epoch % self.ckpt_save_interval == 0) or (cur_epoch == num_epochs):
+                if self._need_flush_from_cache:
+                    self._flush_from_cache(cb_params)
+                # save optim for resume
+                optimizer = self._get_optimizer_from_cbp(cb_params)
+                optim_save_path = os.path.join(self.ckpt_save_dir, f"optim_{self.model_name}.ckpt")
+                save_checkpoint(optimizer, optim_save_path, async_save=True)
+                # keep checkpoint files number equal max number.
+                ckpt_save_path = os.path.join(self.ckpt_save_dir, f"{self.model_name}-{cur_epoch}_{cur_batch}.ckpt")
+                _logger.info(f"Saving model to {ckpt_save_path}")
+                self.ckpt_manager.save_ckpoint(
+                    cb_params.train_network,
+                    num_ckpt=self.ckpt_keep_max,
+                    metric=res[0],
+                    save_path=ckpt_save_path,
+                )
+
+        # logging
+        total_time = time() - self.epoch_ts
+        _logger.info(
+            f"Total time since last epoch: {total_time:.6f}(train: {train_time:.6f}, val: {val_time:.6f})s, "
+            f"ETA: {(num_epochs - cur_epoch) * total_time:.6f}s"
+        )
+        _logger.info("-" * 80)
+        if self.rank_id in [0, None]:
+            log_line = "".join(
+                f"{s:<20}"
+                for s in [
+                    f"{cur_epoch}",
+                    f"{loss.asnumpy():.6f}",
+                    *[f"{i:.4%}" for i in res],
+                    f"{train_time:.2f}",
+                    f"{val_time:.2f}",
+                    f"{total_time:.2f}",
+                ]
+            )
+            with open(self.log_file, "a", encoding="utf-8") as fp:
+                fp.write(log_line + "\n")
+
+        # summary
+        self.summary_record.add_value("scalar", f"train_loss_{self.rank_id}", loss)
+        for i in range(len(res)):
+            self.summary_record.add_value(
+                "scalar", f"val_{self.metric_name[i]}_{self.rank_id}", Tensor(res[i], dtype=ms.float32)
+            )
+        self.summary_record.record(cur_step)
+
+    def on_train_end(self, run_context):
+        _logger.info("Finish training!")
+        if self.dataset_val is not None:
+            _logger.info(
+                f"The best validation {self.metric_name[0]} is: {self.best_res:.4%} at epoch {self.best_epoch}."
+            )
+        _logger.info("=" * 80)
+
+    def _get_network_from_cbp(self, cb_params):
+        if self.dataset_sink_mode:
+            network = cb_params.train_network.network
+        else:
+            network = cb_params.train_network
+        return network
+
+    def _get_optimizer_from_cbp(self, cb_params):
+        if cb_params.optimizer is not None:
+            optimizer = cb_params.optimizer
+        elif self.dataset_sink_mode:
+            optimizer = cb_params.train_network.network.optimizer
+        else:
+            optimizer = cb_params.train_network.optimizer
+        return optimizer
+
+    def _get_lr_from_cbp(self, cb_params):
+        optimizer = self._get_optimizer_from_cbp(cb_params)
+        if optimizer.global_step < 1:
+            _logger.warning(
+                "`global_step` of optimizer is less than 1. It seems to be a overflow at the first step. "
+                "If you keep seeing this message, it means that the optimizer never actually called."
+            )
+            optim_step = Tensor((0,), ms.int32)
+        else:  # if the optimizer is successfully called, the global_step will actually be the value of next step.
+            optim_step = optimizer.global_step - 1
+        if optimizer.dynamic_lr:
+            lr = optimizer.learning_rate(optim_step)[0]
+        else:
+            lr = optimizer.learning_rate
+        return lr
+
+    def _get_loss_from_cbp(self, cb_params):
+        """
+        Get loss from the network output.
+        Args:
+            cb_params (_InternalCallbackParam): Callback parameters.
+        Returns:
+            Union[Tensor, None], if parse loss success, will return a Tensor value(shape is [1]), else return None.
+        """
+        output = cb_params.net_outputs
+        if output is None:
+            _logger.warning("Can not find any output by this network, so SummaryCollector will not collect loss.")
+            return None
+
+        if isinstance(output, (int, float, Tensor)):
+            loss = output
+        elif isinstance(output, (list, tuple)) and output:
+            # If the output is a list, since the default network returns loss first,
+            # we assume that the first one is loss.
+            loss = output[0]
+        else:
+            _logger.warning(
+                "The output type could not be identified, expect type is one of "
+                "[int, float, Tensor, list, tuple], so no loss was recorded in SummaryCollector."
+            )
+            return None
+
+        if not isinstance(loss, Tensor):
+            loss = Tensor(loss)
+
+        loss = Tensor(np.mean(loss.asnumpy()))
+        return loss
+
+    def _flush_from_cache(self, cb_params):
+        """Flush cache data to host if tensor is cache enable."""
+        has_cache_params = False
+        params = cb_params.train_network.get_parameters()
+        for param in params:
+            if param.cache_enable:
+                has_cache_params = True
+                Tensor(param).flush_from_cache()
+        if not has_cache_params:
+            self._need_flush_from_cache = False
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+mindcv.utils.callbacks.StateMonitor.apply_eval(run_context) + +

+ + +
+ +

Model evaluation, return validation accuracy.

+ +
+ Source code in mindcv/utils/callbacks.py +
104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
def apply_eval(self, run_context):
+    """Model evaluation, return validation accuracy."""
+    if self.model_ema:
+        cb_params = run_context.original_args()
+        self.hyper_map(ops.assign, self.swap_params, self.online_params)
+        ema_dict = dict()
+        net = self._get_network_from_cbp(cb_params)
+        for param in net.get_parameters():
+            if param.name.startswith("ema"):
+                new_name = param.name.split("ema.")[1]
+                ema_dict[new_name] = param.data
+        load_param_into_net(self.model.train_network.network, ema_dict)
+        res_dict = self.model.eval(self.dataset_val, dataset_sink_mode=False)
+        self.hyper_map(ops.assign, self.online_params, self.swap_params)
+    else:
+        res_dict = self.model.eval(self.dataset_val, dataset_sink_mode=False)
+    res_array = ms.Tensor(list(res_dict.values()), ms.float32)
+    if self.device_num > 1:
+        res_array = self.all_reduce(res_array)
+        res_array /= self.device_num
+    res_array = res_array.asnumpy()
+    return res_array
+
+
+
+ +
+ + +
+ + + +

+mindcv.utils.callbacks.StateMonitor.on_train_epoch_end(run_context) + +

+ + +
+ +

After epoch, print train loss and val accuracy, +save the best ckpt file with the highest validation accuracy.

+ +
+ Source code in mindcv/utils/callbacks.py +
156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
def on_train_epoch_end(self, run_context):
+    """
+    After epoch, print train loss and val accuracy,
+    save the best ckpt file with the highest validation accuracy.
+    """
+    cb_params = run_context.original_args()
+    num_epochs = cb_params.epoch_num
+    num_batches = cb_params.batch_num
+    cur_step = cb_params.cur_step_num + self.last_epoch * num_batches
+    cur_epoch = cb_params.cur_epoch_num + self.last_epoch
+    cur_batch = (cur_step - 1) % num_batches + 1
+
+    train_time = time() - self.epoch_ts
+    loss = self._get_loss_from_cbp(cb_params)
+
+    val_time = 0
+    res = np.zeros(len(self.metric_name), dtype=np.float32)
+    # val while training if validation loader is not None
+    if (
+        self.dataset_val is not None
+        and cur_epoch >= self.val_start_epoch
+        and (cur_epoch - self.val_start_epoch) % self.val_interval == 0
+    ):
+        val_time = time()
+        res = self.apply_eval(run_context)
+        val_time = time() - val_time
+        # record val acc
+        metric_str = "Validation "
+        for i in range(len(self.metric_name)):
+            metric_str += f"{self.metric_name[i]}: {res[i]:.4%}, "
+        metric_str += f"time: {val_time:.6f}s"
+        _logger.info(metric_str)
+        # save the best ckpt file
+        if res[0] > self.best_res:
+            self.best_res = res[0]
+            self.best_epoch = cur_epoch
+            _logger.info(f"=> New best val acc: {res[0]:.4%}")
+
+    # save checkpoint
+    if self.rank_id in [0, None]:
+        if self.save_best_ckpt and self.best_epoch == cur_epoch:  # always save ckpt if cur epoch got best acc
+            best_ckpt_save_path = os.path.join(self.ckpt_save_dir, f"{self.model_name}_best.ckpt")
+            save_checkpoint(cb_params.train_network, best_ckpt_save_path, async_save=True)
+        if (cur_epoch % self.ckpt_save_interval == 0) or (cur_epoch == num_epochs):
+            if self._need_flush_from_cache:
+                self._flush_from_cache(cb_params)
+            # save optim for resume
+            optimizer = self._get_optimizer_from_cbp(cb_params)
+            optim_save_path = os.path.join(self.ckpt_save_dir, f"optim_{self.model_name}.ckpt")
+            save_checkpoint(optimizer, optim_save_path, async_save=True)
+            # keep checkpoint files number equal max number.
+            ckpt_save_path = os.path.join(self.ckpt_save_dir, f"{self.model_name}-{cur_epoch}_{cur_batch}.ckpt")
+            _logger.info(f"Saving model to {ckpt_save_path}")
+            self.ckpt_manager.save_ckpoint(
+                cb_params.train_network,
+                num_ckpt=self.ckpt_keep_max,
+                metric=res[0],
+                save_path=ckpt_save_path,
+            )
+
+    # logging
+    total_time = time() - self.epoch_ts
+    _logger.info(
+        f"Total time since last epoch: {total_time:.6f}(train: {train_time:.6f}, val: {val_time:.6f})s, "
+        f"ETA: {(num_epochs - cur_epoch) * total_time:.6f}s"
+    )
+    _logger.info("-" * 80)
+    if self.rank_id in [0, None]:
+        log_line = "".join(
+            f"{s:<20}"
+            for s in [
+                f"{cur_epoch}",
+                f"{loss.asnumpy():.6f}",
+                *[f"{i:.4%}" for i in res],
+                f"{train_time:.2f}",
+                f"{val_time:.2f}",
+                f"{total_time:.2f}",
+            ]
+        )
+        with open(self.log_file, "a", encoding="utf-8") as fp:
+            fp.write(log_line + "\n")
+
+    # summary
+    self.summary_record.add_value("scalar", f"train_loss_{self.rank_id}", loss)
+    for i in range(len(res)):
+        self.summary_record.add_value(
+            "scalar", f"val_{self.metric_name[i]}_{self.rank_id}", Tensor(res[i], dtype=ms.float32)
+        )
+    self.summary_record.record(cur_step)
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + +

+ mindcv.utils.callbacks.ValCallback + + +

+ + +
+

+ Bases: Callback

+ + +
+ Source code in mindcv/utils/callbacks.py +
330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
class ValCallback(Callback):
+    def __init__(self, log_interval=100):
+        super().__init__()
+        self.log_interval = log_interval
+        self.ts = time()
+
+    def on_eval_step_end(self, run_context):
+        cb_params = run_context.original_args()
+        num_batches = cb_params.batch_num
+        cur_step = cb_params.cur_step_num
+
+        if cur_step % self.log_interval == 0 or cur_step == num_batches:
+            print(f"batch: {cur_step}/{num_batches}, time: {time() - self.ts:.6f}s")
+            self.ts = time()
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Train Step

+ + +
+ + + +

+ mindcv.utils.train_step.TrainStep + + +

+ + +
+

+ Bases: nn.TrainOneStepWithLossScaleCell

+ + +

Training step with loss scale.

+ +
+ The customized trainOneStepCell also supported following algorithms +
    +
  • Exponential Moving Average (EMA)
  • +
  • Gradient Clipping
  • +
  • Gradient Accumulation
  • +
+
+
+ Source code in mindcv/utils/train_step.py +
 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
class TrainStep(nn.TrainOneStepWithLossScaleCell):
+    """Training step with loss scale.
+
+    The customized trainOneStepCell also supported following algorithms:
+        * Exponential Moving Average (EMA)
+        * Gradient Clipping
+        * Gradient Accumulation
+    """
+
+    def __init__(
+        self,
+        network,
+        optimizer,
+        scale_sense=1.0,
+        ema=False,
+        ema_decay=0.9999,
+        clip_grad=False,
+        clip_value=15.0,
+        gradient_accumulation_steps=1,
+    ):
+        super(TrainStep, self).__init__(network, optimizer, scale_sense)
+        self.ema = ema
+        self.ema_decay = ema_decay
+        self.updates = Parameter(Tensor(0.0, ms.float32))
+        self.clip_grad = clip_grad
+        self.clip_value = clip_value
+        if self.ema:
+            self.weights_all = ms.ParameterTuple(list(network.get_parameters()))
+            self.ema_weight = self.weights_all.clone("ema", init="same")
+
+        self.accumulate_grad = gradient_accumulation_steps > 1
+        if self.accumulate_grad:
+            self.gradient_accumulation = GradientAccumulation(gradient_accumulation_steps, optimizer, self.grad_reducer)
+
+    def ema_update(self):
+        self.updates += 1
+        # ema factor is corrected by (1 - exp(-t/T)), where `t` means time and `T` means temperature.
+        ema_decay = self.ema_decay * (1 - F.exp(-self.updates / 2000))
+        # update trainable parameters
+        success = self.hyper_map(F.partial(_ema_op, ema_decay), self.ema_weight, self.weights_all)
+        return success
+
+    def construct(self, *inputs):
+        weights = self.weights
+        loss = self.network(*inputs)
+        scaling_sens = self.scale_sense
+
+        status, scaling_sens = self.start_overflow_check(loss, scaling_sens)
+
+        scaling_sens_filled = ops.ones_like(loss) * F.cast(scaling_sens, F.dtype(loss))
+        grads = self.grad(self.network, weights)(*inputs, scaling_sens_filled)
+        grads = self.hyper_map(F.partial(_grad_scale, scaling_sens), grads)
+
+        # todo: When to clip grad? Do we need to clip grad after grad reduction? What if grad accumulation is needed?
+        if self.clip_grad:
+            grads = ops.clip_by_global_norm(grads, clip_norm=self.clip_value)
+
+        if self.loss_scaling_manager:  # scale_sense = update_cell: Cell --> TrainOneStepWithLossScaleCell.construct
+            if self.accumulate_grad:
+                # todo: GradientAccumulation only call grad_reducer at the step where the accumulation is completed.
+                #  So checking the overflow status is after gradient reduction, is this correct?
+                # get the overflow buffer
+                cond = self.get_overflow_status(status, grads)
+                overflow = self.process_loss_scale(cond)
+                # if there is no overflow, do optimize
+                if not overflow:
+                    loss = self.gradient_accumulation(loss, grads)
+            else:
+                # apply grad reducer on grads
+                grads = self.grad_reducer(grads)
+                # get the overflow buffer
+                cond = self.get_overflow_status(status, grads)
+                overflow = self.process_loss_scale(cond)
+                # if there is no overflow, do optimize
+                if not overflow:
+                    loss = F.depend(loss, self.optimizer(grads))
+        else:  # scale_sense = loss_scale: Tensor --> TrainOneStepCell.construct
+            if self.accumulate_grad:
+                loss = self.gradient_accumulation(loss, grads)
+            else:
+                grads = self.grad_reducer(grads)
+                loss = F.depend(loss, self.optimizer(grads))
+
+        if self.ema:
+            loss = F.depend(loss, self.ema_update())
+
+        return loss
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +

Trainer Factory

+ + + +
+ + + +

+mindcv.utils.trainer_factory.create_trainer(network, loss, optimizer, metrics, amp_level, amp_cast_list, loss_scale_type, loss_scale=1.0, drop_overflow_update=False, ema=False, ema_decay=0.9999, clip_grad=False, clip_value=15.0, gradient_accumulation_steps=1) + +

+ + +
+ +

Create Trainer.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETER DESCRIPTION
network +
+

The backbone network to train, evaluate or predict.

+
+

+ + TYPE: + nn.Cell + +

+
loss +
+

The function of calculating loss.

+
+

+ + TYPE: + nn.Cell + +

+
optimizer +
+

The optimizer for training.

+
+

+ + TYPE: + nn.Cell + +

+
metrics +
+

The metrics for model evaluation.

+
+

+ + TYPE: + Union[dict, set] + +

+
amp_level +
+

The level of auto mixing precision training.

+
+

+ + TYPE: + str + +

+
amp_cast_list +
+

At the cell level, custom casting the cell to FP16.

+
+

+ + TYPE: + str + +

+
loss_scale_type +
+

The type of loss scale.

+
+

+ + TYPE: + str + +

+
loss_scale +
+

The value of loss scale.

+
+

+ + TYPE: + float + + + DEFAULT: + 1.0 + +

+
drop_overflow_update +
+

Whether to execute optimizer if there is an overflow.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
ema +
+

Whether to use exponential moving average of model weights.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
ema_decay +
+

Decay factor for model weights moving average.

+
+

+ + TYPE: + float + + + DEFAULT: + 0.9999 + +

+
clip_grad +
+

whether to gradient clip.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
clip_value +
+

The value at which to clip gradients.

+
+

+ + TYPE: + float + + + DEFAULT: + 15.0 + +

+
gradient_accumulation_steps +
+

Accumulate the gradients of n batches before update.

+
+

+ + TYPE: + int + + + DEFAULT: + 1 + +

+
+ + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ +
+

mindspore.Model

+
+
+ +
+ Source code in mindcv/utils/trainer_factory.py +
 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
def create_trainer(
+    network: nn.Cell,
+    loss: nn.Cell,
+    optimizer: nn.Cell,
+    metrics: Union[dict, set],
+    amp_level: str,
+    amp_cast_list: str,
+    loss_scale_type: str,
+    loss_scale: float = 1.0,
+    drop_overflow_update: bool = False,
+    ema: bool = False,
+    ema_decay: float = 0.9999,
+    clip_grad: bool = False,
+    clip_value: float = 15.0,
+    gradient_accumulation_steps: int = 1,
+):
+    """Create Trainer.
+
+    Args:
+        network: The backbone network to train, evaluate or predict.
+        loss: The function of calculating loss.
+        optimizer: The optimizer for training.
+        metrics: The metrics for model evaluation.
+        amp_level: The level of auto mixing precision training.
+        amp_cast_list: At the cell level, custom casting the cell to FP16.
+        loss_scale_type: The type of loss scale.
+        loss_scale: The value of loss scale.
+        drop_overflow_update: Whether to execute optimizer if there is an overflow.
+        ema: Whether to use exponential moving average of model weights.
+        ema_decay: Decay factor for model weights moving average.
+        clip_grad: whether to gradient clip.
+        clip_value: The value at which to clip gradients.
+        gradient_accumulation_steps: Accumulate the gradients of n batches before update.
+
+    Returns:
+        mindspore.Model
+
+    """
+    if loss_scale < 1.0:
+        raise ValueError("Loss scale cannot be less than 1.0!")
+
+    if drop_overflow_update is False and loss_scale_type.lower() == "dynamic":
+        raise ValueError("DynamicLossScale ALWAYS drop overflow!")
+
+    if gradient_accumulation_steps < 1:
+        raise ValueError("`gradient_accumulation_steps` must be >= 1!")
+
+    if not require_customized_train_step(ema, clip_grad, gradient_accumulation_steps, amp_cast_list):
+        mindspore_kwargs = dict(
+            network=network,
+            loss_fn=loss,
+            optimizer=optimizer,
+            metrics=metrics,
+            amp_level=amp_level,
+        )
+        if loss_scale_type.lower() == "fixed":
+            mindspore_kwargs["loss_scale_manager"] = FixedLossScaleManager(
+                loss_scale=loss_scale, drop_overflow_update=drop_overflow_update
+            )
+        elif loss_scale_type.lower() == "dynamic":
+            mindspore_kwargs["loss_scale_manager"] = DynamicLossScaleManager(
+                init_loss_scale=loss_scale, scale_factor=2, scale_window=2000
+            )
+        elif loss_scale_type.lower() == "auto":
+            # We don't explicitly construct LossScaleManager
+            _logger.warning(
+                "You are using AUTO loss scale, which means the LossScaleManager isn't explicitly pass in "
+                "when creating a mindspore.Model instance. "
+                "NOTE: mindspore.Model may use LossScaleManager silently. See mindspore.train.amp for details."
+            )
+        else:
+            raise ValueError(f"Loss scale type only support ['fixed', 'dynamic', 'auto'], but got{loss_scale_type}.")
+        model = Model(**mindspore_kwargs)
+    else:  # require customized train step
+        eval_network = nn.WithEvalCell(network, loss, amp_level in ["O2", "O3", "auto"])
+        auto_mixed_precision(network, amp_level, amp_cast_list)
+        net_with_loss = add_loss_network(network, loss, amp_level)
+        train_step_kwargs = dict(
+            network=net_with_loss,
+            optimizer=optimizer,
+            ema=ema,
+            ema_decay=ema_decay,
+            clip_grad=clip_grad,
+            clip_value=clip_value,
+            gradient_accumulation_steps=gradient_accumulation_steps,
+        )
+        if loss_scale_type.lower() == "fixed":
+            loss_scale_manager = FixedLossScaleManager(loss_scale=loss_scale, drop_overflow_update=drop_overflow_update)
+        elif loss_scale_type.lower() == "dynamic":
+            loss_scale_manager = DynamicLossScaleManager(init_loss_scale=loss_scale, scale_factor=2, scale_window=2000)
+        else:
+            raise ValueError(f"Loss scale type only support ['fixed', 'dynamic'], but got{loss_scale_type}.")
+        update_cell = loss_scale_manager.get_update_cell()
+        # 1. loss_scale_type="fixed", drop_overflow_update=False
+        # --> update_cell=None, TrainStep=TrainOneStepCell(scale_sense=loss_scale)
+        # 2. loss_scale_type: fixed, drop_overflow_update: True
+        # --> update_cell=FixedLossScaleUpdateCell, TrainStep=TrainOneStepWithLossScaleCell(scale_sense=update_cell)
+        # 3. loss_scale_type: dynamic, drop_overflow_update: True
+        # --> update_cell=DynamicLossScaleUpdateCell, TrainStep=TrainOneStepWithLossScaleCell(scale_sense=update_cell)
+        if update_cell is None:
+            train_step_kwargs["scale_sense"] = Tensor(loss_scale, dtype=ms.float32)
+        else:
+            if not context.get_context("enable_ge") and context.get_context("device_target") == "CPU":
+                raise ValueError(
+                    "Only `loss_scale_type` is `fixed` and `drop_overflow_update` is `False`"
+                    "are supported on device `CPU`."
+                )
+            train_step_kwargs["scale_sense"] = update_cell
+        train_step_cell = TrainStep(**train_step_kwargs).set_train()
+        model = Model(train_step_cell, eval_network=eval_network, metrics=metrics, eval_indexes=[0, 1, 2])
+        # todo: do we need to set model._loss_scale_manager
+    return model
+
+
+
+ +
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/tutorials/configuration/index.html b/zh/tutorials/configuration/index.html new file mode 100644 index 000000000..6f6964d1b --- /dev/null +++ b/zh/tutorials/configuration/index.html @@ -0,0 +1,1635 @@ + + + + + + + + + + + + + + + + + + + + + + + + 配置 - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

配置

+

下载Notebook

+

MindCV套件可以通过python的argparse库和PyYAML库解析模型的yaml文件来进行参数的配置。 +下面我们以squeezenet_1.0模型为例,解释如何配置相应的参数。

+

基础环境

+
    +
  1. 参数说明
  2. +
+
    +
  • +

    mode:使用静态图模式(0)或动态图模式(1)。

    +
  • +
  • +

    distribute:是否使用分布式。

    +
  • +
+
    +
  1. +

    yaml文件样例

    +
    mode: 0
    +distribute: True
    +...
    +
    +
  2. +
  3. +

    parse参数设置

    +
    python train.py --mode 0 --distribute False ...
    +
    +
  4. +
  5. +

    对应的代码示例

    +
    +

    args.mode代表参数mode, args.distribute代表参数distribute

    +
    +
    def train(args):
    +    ms.set_context(mode=args.mode)
    +
    +    if args.distribute:
    +        init()
    +        device_num = get_group_size()
    +        rank_id = get_rank()
    +        ms.set_auto_parallel_context(device_num=device_num,
    +                                     parallel_mode='data_parallel',
    +                                     gradients_mean=True)
    +    else:
    +        device_num = None
    +        rank_id = None
    +    ...
    +
    +
  6. +
+

数据集

+
    +
  1. 参数说明
  2. +
+
    +
  • +

    dataset:数据集名称。

    +
  • +
  • +

    data_dir:数据集文件所在路径。

    +
  • +
  • +

    shuffle:是否进行数据混洗。

    +
  • +
  • +

    dataset_download:是否下载数据集。

    +
  • +
  • +

    batch_size:每个批处理数据包含的数据条目。

    +
  • +
  • +

    drop_remainder:当最后一个批处理数据包含的数据条目小于 batch_size 时,是否将该批处理丢弃。

    +
  • +
  • +

    num_parallel_workers:读取数据的工作线程数。

    +
  • +
+
    +
  1. +

    yaml文件样例

    +
    dataset: 'imagenet'
    +data_dir: './imagenet2012'
    +shuffle: True
    +dataset_download: False
    +batch_size: 32
    +drop_remainder: True
    +num_parallel_workers: 8
    +...
    +
    +
  2. +
  3. +

    parse参数设置

    +
    python train.py ... --dataset imagenet --data_dir ./imagenet2012 --shuffle True \
    +    --dataset_download False --batch_size 32 --drop_remainder True \
    +    --num_parallel_workers 8 ...
    +
    +
  4. +
  5. +

    对应的代码示例

    +
    def train(args):
    +    ...
    +    dataset_train = create_dataset(
    +        name=args.dataset,
    +        root=args.data_dir,
    +        split='train',
    +        shuffle=args.shuffle,
    +        num_samples=args.num_samples,
    +        num_shards=device_num,
    +        shard_id=rank_id,
    +        num_parallel_workers=args.num_parallel_workers,
    +        download=args.dataset_download,
    +        num_aug_repeats=args.aug_repeats)
    +
    +    ...
    +    target_transform = transforms.OneHot(num_classes) if args.loss == 'BCE' else None
    +
    +    loader_train = create_loader(
    +        dataset=dataset_train,
    +        batch_size=args.batch_size,
    +        drop_remainder=args.drop_remainder,
    +        is_training=True,
    +        mixup=args.mixup,
    +        cutmix=args.cutmix,
    +        cutmix_prob=args.cutmix_prob,
    +        num_classes=args.num_classes,
    +        transform=transform_list,
    +        target_transform=target_transform,
    +        num_parallel_workers=args.num_parallel_workers,
    +    )
    +    ...
    +
    +
  6. +
+

数据增强

+
    +
  1. 参数说明
  2. +
+
    +
  • +

    image_resize:图像的输出尺寸大小。

    +
  • +
  • +

    scale:要裁剪的原始尺寸大小的各个尺寸的范围。

    +
  • +
  • +

    ratio:裁剪宽高比的范围。

    +
  • +
  • +

    hfilp:图像被翻转的概率。

    +
  • +
  • +

    interpolation:图像插值方式。

    +
  • +
  • +

    crop_pct:输入图像中心裁剪百分比。

    +
  • +
  • +

    color_jitter:颜色抖动因子(亮度调整因子,对比度调整因子,饱和度调整因子)。

    +
  • +
  • +

    re_prob:执行随机擦除的概率。

    +
  • +
+
    +
  1. +

    yaml文件样例

    +
    image_resize: 224
    +scale: [0.08, 1.0]
    +ratio: [0.75, 1.333]
    +hflip: 0.5
    +interpolation: 'bilinear'
    +crop_pct: 0.875
    +color_jitter: [0.4, 0.4, 0.4]
    +re_prob: 0.5
    +...
    +
    +
  2. +
  3. +

    parse参数设置

    +
    python train.py ... --image_resize 224 --scale [0.08, 1.0] --ratio [0.75, 1.333] \
    +    --hflip 0.5 --interpolation "bilinear" --crop_pct 0.875 \
    +    --color_jitter [0.4, 0.4, 0.4] --re_prob 0.5 ...
    +
    +
  4. +
  5. +

    对应的代码示例

    +
    def train(args):
    +    ...
    +    transform_list = create_transforms(
    +        dataset_name=args.dataset,
    +        is_training=True,
    +        image_resize=args.image_resize,
    +        scale=args.scale,
    +        ratio=args.ratio,
    +        hflip=args.hflip,
    +        vflip=args.vflip,
    +        color_jitter=args.color_jitter,
    +        interpolation=args.interpolation,
    +        auto_augment=args.auto_augment,
    +        mean=args.mean,
    +        std=args.std,
    +        re_prob=args.re_prob,
    +        re_scale=args.re_scale,
    +        re_ratio=args.re_ratio,
    +        re_value=args.re_value,
    +        re_max_attempts=args.re_max_attempts
    +    )
    +    ...
    +
    +
  6. +
+

模型

+
    +
  1. 参数说明
  2. +
+
    +
  • +

    model:模型名称。

    +
  • +
  • +

    num_classes:分类的类别数。

    +
  • +
  • +

    pretrained:是否加载预训练模型。

    +
  • +
  • +

    ckpt_path:参数文件所在的路径。

    +
  • +
  • +

    keep_checkpoint_max:最多保存多少个checkpoint文件。

    +
  • +
  • +

    ckpt_save_dir:保存参数文件的路径。

    +
  • +
  • +

    epoch_size:训练执行轮次。

    +
  • +
  • +

    dataset_sink_mode:数据是否直接下沉至处理器进行处理。

    +
  • +
  • +

    amp_level:混合精度等级。

    +
  • +
+
    +
  1. +

    yaml文件样例

    +
    model: 'squeezenet1_0'
    +num_classes: 1000
    +pretrained: False
    +ckpt_path: './squeezenet1_0_gpu.ckpt'
    +keep_checkpoint_max: 10
    +ckpt_save_dir: './ckpt/'
    +epoch_size: 200
    +dataset_sink_mode: True
    +amp_level: 'O0'
    +...
    +
    +
  2. +
  3. +

    parse参数设置

    +
    python train.py ... --model squeezenet1_0 --num_classes 1000 --pretrained False \
    +    --ckpt_path ./squeezenet1_0_gpu.ckpt --keep_checkpoint_max 10 \
    +    --ckpt_save_path ./ckpt/ --epoch_size 200 --dataset_sink_mode True \
    +    --amp_level O0 ...
    +
    +
  4. +
  5. +

    对应的代码示例

    +
    def train(args):
    +    ...
    +    network = create_model(model_name=args.model,
    +        num_classes=args.num_classes,
    +        in_channels=args.in_channels,
    +        drop_rate=args.drop_rate,
    +        drop_path_rate=args.drop_path_rate,
    +        pretrained=args.pretrained,
    +        checkpoint_path=args.ckpt_path,
    +        ema=args.ema
    +    )
    +    ...
    +
    +
  6. +
+

损失函数

+
    +
  1. 参数说明
  2. +
+
    +
  • +

    loss:损失函数的简称。

    +
  • +
  • +

    label_smoothing:标签平滑值,用于计算Loss时防止模型过拟合的正则化手段。

    +
  • +
+
    +
  1. +

    yaml文件样例

    +
    loss: 'CE'
    +label_smoothing: 0.1
    +...
    +
    +
  2. +
  3. +

    parse参数设置

    +
    python train.py ... --loss CE --label_smoothing 0.1 ...
    +
    +
  4. +
  5. +

    对应的代码示例

    +
    def train(args):
    +    ...
    +    loss = create_loss(name=args.loss,
    +        reduction=args.reduction,
    +        label_smoothing=args.label_smoothing,
    +        aux_factor=args.aux_factor
    +    )
    +    ...
    +
    +
  6. +
+

学习率策略

+
    +
  1. 参数说明
  2. +
+
    +
  • +

    scheduler:学习率策略的名称。

    +
  • +
  • +

    min_lr:学习率的最小值。

    +
  • +
  • +

    lr:学习率的最大值。

    +
  • +
  • +

    warmup_epochs:学习率warmup的轮次。

    +
  • +
  • +

    decay_epochs:进行衰减的step数。

    +
  • +
+
    +
  1. +

    yaml文件样例

    +
    scheduler: 'cosine_decay'
    +min_lr: 0.0
    +lr: 0.01
    +warmup_epochs: 0
    +decay_epochs: 200
    +...
    +
    +
  2. +
  3. +

    parse参数设置

    +
    python train.py ... --scheduler cosine_decay --min_lr 0.0 --lr 0.01 \
    +    --warmup_epochs 0 --decay_epochs 200 ...
    +
    +
  4. +
  5. +

    对应的代码示例

    +
    def train(args):
    +    ...
    +    lr_scheduler = create_scheduler(num_batches,
    +        scheduler=args.scheduler,
    +        lr=args.lr,
    +        min_lr=args.min_lr,
    +        warmup_epochs=args.warmup_epochs,
    +        warmup_factor=args.warmup_factor,
    +        decay_epochs=args.decay_epochs,
    +        decay_rate=args.decay_rate,
    +        milestones=args.multi_step_decay_milestones,
    +        num_epochs=args.epoch_size,
    +        lr_epoch_stair=args.lr_epoch_stair
    +    )
    +    ...
    +
    +
  6. +
+

优化器

+
    +
  1. 参数说明
  2. +
+
    +
  • +

    opt:优化器名称。

    +
  • +
  • +

    filter_bias_and_bn:参数中是否包含bias,gamma或者beta。

    +
  • +
  • +

    momentum:移动平均的动量。

    +
  • +
  • +

    weight_decay:权重衰减(L2 penalty)。

    +
  • +
  • +

    loss_scale:梯度缩放系数

    +
  • +
  • +

    use_nesterov:是否使用Nesterov Accelerated Gradient (NAG)算法更新梯度。

    +
  • +
+
    +
  1. +

    yaml文件样例

    +
    opt: 'momentum'
    +filter_bias_and_bn: True
    +momentum: 0.9
    +weight_decay: 0.00007
    +loss_scale: 1024
    +use_nesterov: False
    +...
    +
    +
  2. +
  3. +

    parse参数设置

    +
    python train.py ... --opt momentum --filter_bias_and_bn True --weight_decay 0.00007 \
    +    --loss_scale 1024 --use_nesterov False ...
    +
    +
  4. +
  5. +

    对应的代码示例

    +
    def train(args):
    +    ...
    +    if args.ema:
    +        optimizer = create_optimizer(network.trainable_params(),
    +            opt=args.opt,
    +            lr=lr_scheduler,
    +            weight_decay=args.weight_decay,
    +            momentum=args.momentum,
    +            nesterov=args.use_nesterov,
    +            filter_bias_and_bn=args.filter_bias_and_bn,
    +            loss_scale=args.loss_scale,
    +            checkpoint_path=opt_ckpt_path,
    +            eps=args.eps
    +        )
    +    else:
    +        optimizer = create_optimizer(network.trainable_params(),
    +            opt=args.opt,
    +            lr=lr_scheduler,
    +            weight_decay=args.weight_decay,
    +            momentum=args.momentum,
    +            nesterov=args.use_nesterov,
    +            filter_bias_and_bn=args.filter_bias_and_bn,
    +            checkpoint_path=opt_ckpt_path,
    +            eps=args.eps
    +        )
    +    ...
    +
    +
  6. +
+

Yaml和Parse组合使用

+

使用parse设置参数可以覆盖yaml文件中的参数设置。以下面的shell命令为例,

+
python train.py -c ./configs/squeezenet/squeezenet_1.0_gpu.yaml --data_dir ./data
+
+

上面的命令将args.data_dir参数的值由yaml文件中的 ./imagenet2012 覆盖为 ./data

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/tutorials/deployment/index.html b/zh/tutorials/deployment/index.html new file mode 100644 index 000000000..0cb25dff8 --- /dev/null +++ b/zh/tutorials/deployment/index.html @@ -0,0 +1,1290 @@ + + + + + + + + + + + + + + + + + + + + + + + + 部署 - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

部署推理服务

+

MindSpore Serving是一个轻量级、高性能的推理服务模块,旨在帮助MindSpore开发者在生产环境中高效部署在线推理服务。当用户使用MindSpore完成模型训练后,导出MindSpore模型,即可使用MindSpore Serving创建该模型的推理服务。

+

本文以mobilenet_v2_100网络为例,演示基于MindSpore Serving进行部署推理服务的方法。

+

环境准备

+

进行部署前,需确保已经正确安装了MindSpore Serving,并配置了环境变量。MindSpore Serving安装和配置可以参考MindSpore Serving安装页面

+

模型导出

+

实现跨平台或硬件执行推理(如昇腾AI处理器、MindSpore端侧、GPU等),需要通过网络定义和CheckPoint生成MindIR格式模型文件。在MindSpore中,网络模型导出的函数为export,主要参数如下所示:

+
    +
  • net:MindSpore网络结构。
  • +
  • inputs:网络的输入,支持输入类型为Tensor。当输入有多个时,需要一起传入,如ms.export(network, ms.Tensor(input1), ms.Tensor(input2), file_name='network', file_format='MINDIR')
  • +
  • file_name:导出模型的文件名称,如果file_name没有包含对应的后缀名(如.mindir),设置file_format后系统会为文件名自动添加后缀。
  • +
  • file_format:MindSpore目前支持导出”AIR”,”ONNX”和”MINDIR”格式的模型。
  • +
+

下面代码以mobilenet_v2_100为例,导出MindCV的预训练网络模型,获得MindIR格式模型文件。

+
from mindcv.models import create_model
+import numpy as np
+import mindspore as ms
+
+model = create_model(model_name='mobilenet_v2_100', num_classes=1000, pretrained=True)
+
+input_np = np.random.uniform(0.0, 1.0, size=[1, 3, 224, 224]).astype(np.float32)
+
+# 导出文件mobilenet_v2_100.mindir到当前文件夹
+ms.export(model, ms.Tensor(input_np), file_name='mobilenet_v2_100', file_format='MINDIR')
+
+

部署Serving推理服务

+

配置服务

+

启动Serving服务,执行本教程需要如下文件列表:

+
demo
+├── mobilenet_v2_100
+│   ├── 1
+│   │   └── mobilenet_v2_100.mindir
+│   └── servable_config.py
+│── serving_server.py
+├── serving_client.py
+├── imagenet1000_clsidx_to_labels.txt
+└── test_image
+    ├─ dog
+    │   ├─ dog.jpg
+    │   └─ ……
+    └─ ……
+
+
    +
  • mobilenet_v2_100为模型文件夹,文件夹名即为模型名。
  • +
  • mobilenet_v2_100.mindir为上一步网络生成的模型文件,放置在文件夹1下,1为版本号,不同的版本放置在不同的文件夹下,版本号需以纯数字串命名,默认配置下启动最大数值的版本号的模型文件。
  • +
  • servable_config.py为模型配置脚本,对模型进行声明、入参和出参定义。
  • +
  • serving_server.py为启动服务脚本文件。
  • +
  • serving_client.py为启动客户端脚本文件。
  • +
  • imagenet1000_clsidx_to_labels.txt为ImageNet数据集1000个类别的索引,可以在examples/data/中得到。
  • +
  • test_image中为测试图片,可以在README中得到。
  • +
+

其中,模型配置文件servable_config.py内容如下:

+
from mindspore_serving.server import register
+
+# 进行模型声明,其中declare_model入参model_file指示模型的文件名称,model_format指示模型的模型类别
+model = register.declare_model(model_file="mobilenet_v2_100.mindir", model_format="MindIR")
+
+# Servable方法的入参由Python方法的入参指定,Servable方法的出参由register_method的output_names指定
+@register.register_method(output_names=["score"])
+def predict(image):
+    x = register.add_stage(model, image, outputs_count=1)
+    return x
+
+

启动服务

+

MindSpore的server函数提供两种服务部署,一种是gRPC方式,一种是通过RESTful方式,本教程以gRPC方式为例。服务启动脚本serving_server.py把本地目录下的mobilenet_v2_100部署到设备0,并启动地址为127.0.0.1:5500的gRPC服务器。脚本文件内容如下:

+
import os
+import sys
+from mindspore_serving import server
+
+def start():
+    servable_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
+
+    servable_config = server.ServableStartConfig(servable_directory=servable_dir, servable_name="mobilenet_v2_100",
+                                                 device_ids=0)
+    server.start_servables(servable_configs=servable_config)
+    server.start_grpc_server(address="127.0.0.1:5500")
+
+if __name__ == "__main__":
+    start()
+
+

当服务端打印如下日志时,表示Serving gRPC服务启动成功。

+
Serving gRPC server start success, listening on 127.0.0.1:5500
+
+

执行推理

+

使用serving_client.py,启动Python客户端。客户端脚本使用mindcv.datacreate_transforms, create_datasetcreate_loader函数,进行图片预处理,再传送给Serving服务器。对服务器返回的结果进行后处理,打印图片的预测标签。

+
import os
+from mindspore_serving.client import Client
+import numpy as np
+from mindcv.data import create_transforms, create_dataset, create_loader
+
+num_workers = 1
+
+# 数据集目录路径
+data_dir = "./test_image/"
+
+dataset = create_dataset(root=data_dir, split='', num_parallel_workers=num_workers)
+transforms_list = create_transforms(dataset_name='ImageNet', is_training=False)
+data_loader = create_loader(
+    dataset=dataset,
+    batch_size=1,
+    is_training=False,
+    num_classes=1000,
+    transform=transforms_list,
+    num_parallel_workers=num_workers
+)
+with open("imagenet1000_clsidx_to_labels.txt") as f:
+    idx2label = eval(f.read())
+
+def postprocess(score):
+    max_idx = np.argmax(score)
+    return idx2label[max_idx]
+
+def predict():
+    client = Client("127.0.0.1:5500", "mobilenet_v2_100", "predict")
+    instances = []
+    images, _ = next(data_loader.create_tuple_iterator())
+    image_np = images.asnumpy().squeeze()
+    instances.append({"image": image_np})
+    result = client.infer(instances)
+
+    for instance in result:
+        label = postprocess(instance["score"])
+        print(label)
+
+if __name__ == '__main__':
+    predict()
+
+

执行后显示如下返回值,说明Serving服务已正确执行mobilenet_v2_100网络模型的推理。 +

Labrador retriever
+

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/tutorials/finetune/index.html b/zh/tutorials/finetune/index.html new file mode 100644 index 000000000..6d2f57f4b --- /dev/null +++ b/zh/tutorials/finetune/index.html @@ -0,0 +1,1706 @@ + + + + + + + + + + + + + + + + + + + + + + + + 微调 - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

自定义数据集上的模型微调训练

+

下载Notebook

+

在此教程中,您将学会如何使用MindCV套件进行迁移学习,以解决自定义数据集上的图像分类问题。 +在深度学习任务中,常见遇到训练数据不足的问题,此时直接训练整个网络往往难以达到理想的精度。 +一个比较好的做法是,使用一个在大规模数据集上(与任务数据较为接近)预训练好的模型,然后使用该模型来初始化网络的权重参数或作为固定特征提取器应用于特定的任务中。

+

此教程将以使用ImageNet上预训练的DenseNet模型为例,介绍两种不同的微调策略,解决小样本情况下狼和狗的图像分类问题:

+
    +
  1. 整体模型微调。
  2. +
  3. 冻结特征网络(freeze backbone),只微调分类器。
  4. +
+
+

迁移学习详细内容见Stanford University CS231n

+
+

数据准备

+

下载数据集

+

下载案例所用到的狗与狼分类数据集, +每个类别各有120张训练图像与30张验证图像。使用mindcv.utils.download接口下载数据集,并将下载后的数据集自动解压到当前目录下。

+
import os
+from mindcv.utils.download import DownLoad
+
+dataset_url = "https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/notebook/datasets/intermediate/Canidae_data.zip"
+root_dir = "./"
+
+if not os.path.exists(os.path.join(root_dir, 'data/Canidae')):
+    DownLoad().download_and_extract_archive(dataset_url, root_dir)
+
+

数据集的目录结构如下:

+
data/
+└── Canidae
+    ├── train
+    │   ├── dogs
+    │   └── wolves
+    └── val
+        ├── dogs
+        └── wolves
+
+

数据集加载及处理

+

自定义数据集的加载

+

通过调用mindcv.data中的create_dataset函数,我们可轻松地加载预设的数据集以及自定义的数据集。

+
    +
  • 当参数name设为空时,指定为自定义数据集。(默认值)
  • +
  • 当参数name设为MNIST, CIFAR10等标准数据集名称时,指定为预设数据集。
  • +
+

同时,我们需要设定数据集的路径data_dir和数据切分的名称split (如train, val),以加载对应的训练集或者验证集。

+
from mindcv.data import create_dataset, create_transforms, create_loader
+
+num_workers = 8
+
+# 数据集目录路径
+data_dir = "./data/Canidae/"
+
+# 加载自定义数据集
+dataset_train = create_dataset(root=data_dir, split='train', num_parallel_workers=num_workers)
+dataset_val = create_dataset(root=data_dir, split='val', num_parallel_workers=num_workers)
+
+
+

注意: 自定义数据集的目录结构应与ImageNet一样,即root -> split -> class -> image 的层次结构

+
+
DATASET_NAME
+    ├── split1(e.g. train)/
+    │  ├── class1/
+    │  │   ├── 000001.jpg
+    │  │   ├── 000002.jpg
+    │  │   └── ....
+    │  └── class2/
+    │      ├── 000001.jpg
+    │      ├── 000002.jpg
+    │      └── ....
+    └── split2/
+       ├── class1/
+       │   ├── 000001.jpg
+       │   ├── 000002.jpg
+       │   └── ....
+       └── class2/
+           ├── 000001.jpg
+           ├── 000002.jpg
+           └── ....
+
+

数据处理及增强

+

首先我们通过调用create_transforms函数, 获得预设的数据处理和增强策略(transform list),此任务中,因狼狗图像和ImageNet数据一致(即domain一致),我们指定参数dataset_name为ImageNet,直接用预设好的ImageNet的数据处理和图像增强策略。create_transforms 同样支持多种自定义的处理和增强操作,以及自动增强策略(AutoAug)。详见API说明。

+

我们将得到的transform list传入create_loader(),并指定batch_size和其他参数,即可完成训练和验证数据的准备,返回Dataset Object,作为模型的输入。

+
# 定义和获取数据处理及增强操作
+trans_train = create_transforms(dataset_name='ImageNet', is_training=True)
+trans_val = create_transforms(dataset_name='ImageNet',is_training=False)
+
+loader_train = create_loader(
+    dataset=dataset_train,
+    batch_size=16,
+    is_training=True,
+    num_classes=2,
+    transform=trans_train,
+    num_parallel_workers=num_workers,
+)
+loader_val = create_loader(
+    dataset=dataset_val,
+    batch_size=5,
+    is_training=True,
+    num_classes=2,
+    transform=trans_val,
+    num_parallel_workers=num_workers,
+)
+
+

数据集可视化

+

对于create_loader接口返回的完成数据加载的Dataset object,我们可以通过 create_tuple_iterator 接口创建数据迭代器,使用 next 迭代访问数据集,读取到一个batch的数据。

+
images, labels = next(loader_train.create_tuple_iterator())
+print("Tensor of image", images.shape)
+print("Labels:", labels)
+
+
Tensor of image (16, 3, 224, 224)
+Labels: [0 1 1 1 1 0 0 0 0 0 1 0 1 0 1 1]
+
+

对获取到的图像及标签数据进行可视化,标题为图像对应的label名称。

+
import matplotlib.pyplot as plt
+import numpy as np
+
+# class_name对应label,按文件夹字符串从小到大的顺序标记label
+class_name = {0: "dogs", 1: "wolves"}
+
+plt.figure(figsize=(15, 7))
+for i in range(len(labels)):
+    # 获取图像及其对应的label
+    data_image = images[i].asnumpy()
+    data_label = labels[i]
+    # 处理图像供展示使用
+    data_image = np.transpose(data_image, (1, 2, 0))
+    mean = np.array([0.485, 0.456, 0.406])
+    std = np.array([0.229, 0.224, 0.225])
+    data_image = std * data_image + mean
+    data_image = np.clip(data_image, 0, 1)
+    # 显示图像
+    plt.subplot(3, 6, i + 1)
+    plt.imshow(data_image)
+    plt.title(class_name[int(labels[i].asnumpy())])
+    plt.axis("off")
+
+plt.show()
+
+

png

+

模型微调

+

1. 整体模型微调

+

预训练模型加载

+

我们使用mindcv.models.densenet中定义DenseNet121网络,当接口中的pretrained参数设置为True时,可以自动下载网络权重。 +由于该预训练模型是针对ImageNet数据集中的1000个类别进行分类的,这里我们设定num_classes=2, DenseNet的classifier(即最后的FC层)输出调整为两维,此时只加载backbone的预训练权重,而classifier则使用初始值。

+
from mindcv.models import create_model
+
+network = create_model(model_name='densenet121', num_classes=2, pretrained=True)
+
+
+

DenseNet的具体结构可参见DenseNet论文

+
+

模型训练

+

使用已加载处理好的带标签的狼和狗图像,对DenseNet进行微调网络。注意,对整体模型做微调时,应使用较小的learning rate。

+
from mindcv.loss import create_loss
+from mindcv.optim import create_optimizer
+from mindcv.scheduler import create_scheduler
+from mindspore import Model, LossMonitor, TimeMonitor
+
+# 定义优化器和损失函数
+opt = create_optimizer(network.trainable_params(), opt='adam', lr=1e-4)
+loss = create_loss(name='CE')
+
+# 实例化模型
+model = Model(network, loss_fn=loss, optimizer=opt, metrics={'accuracy'})
+model.train(10, loader_train, callbacks=[LossMonitor(5), TimeMonitor(5)], dataset_sink_mode=False)
+
+
epoch: 1 step: 5, loss is 0.5195528864860535
+epoch: 1 step: 10, loss is 0.2654373049736023
+epoch: 1 step: 15, loss is 0.28758567571640015
+Train epoch time: 17270.144 ms, per step time: 1151.343 ms
+epoch: 2 step: 5, loss is 0.1807008981704712
+epoch: 2 step: 10, loss is 0.1700802594423294
+epoch: 2 step: 15, loss is 0.09752683341503143
+Train epoch time: 1372.549 ms, per step time: 91.503 ms
+epoch: 3 step: 5, loss is 0.13594701886177063
+epoch: 3 step: 10, loss is 0.03628234937787056
+epoch: 3 step: 15, loss is 0.039737217128276825
+Train epoch time: 1453.237 ms, per step time: 96.882 ms
+epoch: 4 step: 5, loss is 0.014213413000106812
+epoch: 4 step: 10, loss is 0.030747078359127045
+epoch: 4 step: 15, loss is 0.0798817127943039
+Train epoch time: 1331.237 ms, per step time: 88.749 ms
+epoch: 5 step: 5, loss is 0.009510636329650879
+epoch: 5 step: 10, loss is 0.02603740245103836
+epoch: 5 step: 15, loss is 0.051846928894519806
+Train epoch time: 1312.737 ms, per step time: 87.516 ms
+epoch: 6 step: 5, loss is 0.1163717582821846
+epoch: 6 step: 10, loss is 0.02439398318529129
+epoch: 6 step: 15, loss is 0.02564268559217453
+Train epoch time: 1434.704 ms, per step time: 95.647 ms
+epoch: 7 step: 5, loss is 0.013310655951499939
+epoch: 7 step: 10, loss is 0.02289542555809021
+epoch: 7 step: 15, loss is 0.1992517113685608
+Train epoch time: 1275.935 ms, per step time: 85.062 ms
+epoch: 8 step: 5, loss is 0.015928998589515686
+epoch: 8 step: 10, loss is 0.011409260332584381
+epoch: 8 step: 15, loss is 0.008141174912452698
+Train epoch time: 1323.102 ms, per step time: 88.207 ms
+epoch: 9 step: 5, loss is 0.10395607352256775
+epoch: 9 step: 10, loss is 0.23055407404899597
+epoch: 9 step: 15, loss is 0.04896317049860954
+Train epoch time: 1261.067 ms, per step time: 84.071 ms
+epoch: 10 step: 5, loss is 0.03162381425499916
+epoch: 10 step: 10, loss is 0.13094250857830048
+epoch: 10 step: 15, loss is 0.020028553903102875
+Train epoch time: 1217.958 ms, per step time: 81.197 ms
+
+

模型评估

+

在训练完成后,我们在验证集上评估模型的精度。

+
res = model.eval(loader_val)
+print(res)
+
+
{'accuracy': 1.0}
+
+

可视化模型推理结果

+

定义 visualize_mode 函数,可视化模型预测。

+
import matplotlib.pyplot as plt
+import mindspore as ms
+
+def visualize_model(model, val_dl, num_classes=2):
+    # 加载验证集的数据进行验证
+    images, labels= next(val_dl.create_tuple_iterator())
+    # 预测图像类别
+    output = model.predict(images)
+    pred = np.argmax(output.asnumpy(), axis=1)
+    # 显示图像及图像的预测值
+    images = images.asnumpy()
+    labels = labels.asnumpy()
+    class_name = {0: "dogs", 1: "wolves"}
+    plt.figure(figsize=(15, 7))
+    for i in range(len(labels)):
+        plt.subplot(3, 6, i + 1)
+        # 若预测正确,显示为蓝色;若预测错误,显示为红色
+        color = 'blue' if pred[i] == labels[i] else 'red'
+        plt.title('predict:{}'.format(class_name[pred[i]]), color=color)
+        picture_show = np.transpose(images[i], (1, 2, 0))
+        mean = np.array([0.485, 0.456, 0.406])
+        std = np.array([0.229, 0.224, 0.225])
+        picture_show = std * picture_show + mean
+        picture_show = np.clip(picture_show, 0, 1)
+        plt.imshow(picture_show)
+        plt.axis('off')
+
+    plt.show()
+
+

使用微调过后的模型对验证集的狼和狗图像数据进行预测。若预测字体为蓝色表示预测正确,若预测字体为红色表示预测错误。

+
visualize_model(model, loader_val)
+
+

png

+

2. 冻结特征网络, 微调分类器

+

冻结特征网络的参数

+

首先,我们要冻结除最后一层分类器之外的所有网络层,即将相应的层参数的requires_grad属性设置为False,使其不在反向传播中计算梯度及更新参数。

+

因为mindcv.models 中所有的模型均以classifier 来标识和命名模型的分类器(即Dense层),所以通过 classifier.weightclassifier.bias 即可筛选出分类器外的各层参数,将其requires_grad属性设置为False.

+
# freeze backbone
+for param in network.get_parameters():
+    if param.name not in ["classifier.weight", "classifier.bias"]:
+        param.requires_grad = False
+
+

微调分类器

+

因为特征网络已经固定,我们不必担心训练过程会distort pratrained features,因此,相比于第一种方法,我们可以将learning rate调大一些。

+

与没有预训练模型相比,将节约一大半时间,因为此时可以不用计算部分梯度。

+
# 加载数据集
+dataset_train = create_dataset(root=data_dir, split='train', num_parallel_workers=num_workers)
+loader_train = create_loader(
+    dataset=dataset_train,
+    batch_size=16,
+    is_training=True,
+    num_classes=2,
+    transform=trans_train,
+    num_parallel_workers=num_workers,
+)
+
+# 定义优化器和损失函数
+opt = create_optimizer(network.trainable_params(), opt='adam', lr=1e-3)
+loss = create_loss(name='CE')
+
+# 实例化模型
+model = Model(network, loss_fn=loss, optimizer=opt, metrics={'accuracy'})
+
+model.train(10, loader_train, callbacks=[LossMonitor(5), TimeMonitor(5)], dataset_sink_mode=False)
+
+
epoch: 1 step: 5, loss is 0.051333948969841
+epoch: 1 step: 10, loss is 0.02043312042951584
+epoch: 1 step: 15, loss is 0.16161368787288666
+Train epoch time: 10228.601 ms, per step time: 681.907 ms
+epoch: 2 step: 5, loss is 0.002121545374393463
+epoch: 2 step: 10, loss is 0.0009798109531402588
+epoch: 2 step: 15, loss is 0.015776708722114563
+Train epoch time: 562.543 ms, per step time: 37.503 ms
+epoch: 3 step: 5, loss is 0.008056879043579102
+epoch: 3 step: 10, loss is 0.0009347647428512573
+epoch: 3 step: 15, loss is 0.028648357838392258
+Train epoch time: 523.249 ms, per step time: 34.883 ms
+epoch: 4 step: 5, loss is 0.001014217734336853
+epoch: 4 step: 10, loss is 0.0003159046173095703
+epoch: 4 step: 15, loss is 0.0007699579000473022
+Train epoch time: 508.886 ms, per step time: 33.926 ms
+epoch: 5 step: 5, loss is 0.0015687644481658936
+epoch: 5 step: 10, loss is 0.012090332806110382
+epoch: 5 step: 15, loss is 0.004598274827003479
+Train epoch time: 507.243 ms, per step time: 33.816 ms
+epoch: 6 step: 5, loss is 0.010022152215242386
+epoch: 6 step: 10, loss is 0.0066385045647621155
+epoch: 6 step: 15, loss is 0.0036080628633499146
+Train epoch time: 517.646 ms, per step time: 34.510 ms
+epoch: 7 step: 5, loss is 0.01344013586640358
+epoch: 7 step: 10, loss is 0.0008538365364074707
+epoch: 7 step: 15, loss is 0.14135593175888062
+Train epoch time: 511.513 ms, per step time: 34.101 ms
+epoch: 8 step: 5, loss is 0.01626245677471161
+epoch: 8 step: 10, loss is 0.02871556021273136
+epoch: 8 step: 15, loss is 0.010110966861248016
+Train epoch time: 545.678 ms, per step time: 36.379 ms
+epoch: 9 step: 5, loss is 0.008498094975948334
+epoch: 9 step: 10, loss is 0.2588501274585724
+epoch: 9 step: 15, loss is 0.0014278888702392578
+Train epoch time: 499.243 ms, per step time: 33.283 ms
+epoch: 10 step: 5, loss is 0.021337147802114487
+epoch: 10 step: 10, loss is 0.00829876959323883
+epoch: 10 step: 15, loss is 0.008352771401405334
+Train epoch time: 465.600 ms, per step time: 31.040 ms
+
+

模型评估

+

训练完成之后,我们在验证集上评估模型的准确率。

+
dataset_val = create_dataset(root=data_dir, split='val', num_parallel_workers=num_workers)
+loader_val = create_loader(
+    dataset=dataset_val,
+    batch_size=5,
+    is_training=True,
+    num_classes=2,
+    transform=trans_val,
+    num_parallel_workers=num_workers,
+)
+
+res = model.eval(loader_val)
+print(res)
+
+
{'accuracy': 1.0}
+
+

可视化模型预测

+

使用微调过后的模型件对验证集的狼和狗图像数据进行预测。若预测字体为蓝色表示预测正确,若预测字体为红色表示预测错误。

+
visualize_model(model, loader_val)
+
+

png

+

微调后的狼狗预测结果均正确

+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/tutorials/inference/index.html b/zh/tutorials/inference/index.html new file mode 100644 index 000000000..013650d7c --- /dev/null +++ b/zh/tutorials/inference/index.html @@ -0,0 +1,1288 @@ + + + + + + + + + + + + + + + + + + + + + + + + 推理 - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

图像分类预测

+

下载Notebook

+

本教程介绍如何在MindCV中调用预训练模型,在测试图像上进行分类预测。

+

模型加载

+

查看全部可用的网络模型

+

通过调用mindcv.models中的registry.list_models函数,可以打印出全部网络模型的名字,一个网络在不同参数配置下的模型也会分别打印出来,例如resnet18 / resnet34 / resnet50 / resnet101 / resnet152。

+
import sys
+sys.path.append("..")
+from mindcv.models import registry
+registry.list_models()
+
+
['BiT_resnet50',
+ 'repmlp_b224',
+ 'repmlp_b256',
+ 'repmlp_d256',
+ 'repmlp_l256',
+ 'repmlp_t224',
+ 'repmlp_t256',
+ 'convit_base',
+ 'convit_base_plus',
+ 'convit_small',
+ ...
+ 'visformer_small',
+ 'visformer_small_v2',
+ 'visformer_tiny',
+ 'visformer_tiny_v2',
+ 'vit_b_16_224',
+ 'vit_b_16_384',
+ 'vit_b_32_224',
+ 'vit_b_32_384',
+ 'vit_l_16_224',
+ 'vit_l_16_384',
+ 'vit_l_32_224',
+ 'xception']
+
+

加载预训练模型

+

我们以resnet50模型为例,介绍两种使用mindcv.modelscreate_model函数进行模型checkpoint加载的方法。

+

1). 当接口中的pretrained参数设置为True时,可以自动下载网络权重。

+
from mindcv.models import create_model
+model = create_model(model_name='resnet50', num_classes=1000, pretrained=True)
+# 切换网络的执行逻辑为推理场景
+model.set_train(False)
+
+
102453248B [00:16, 6092186.31B/s]
+
+ResNet<
+  (conv1): Conv2d<input_channels=3, output_channels=64, kernel_size=(7, 7), stride=(2, 2), pad_mode=pad, padding=3, dilation=(1, 1), group=1, has_bias=False, weight_init=normal, bias_init=zeros, format=NCHW>
+  (bn1): BatchNorm2d<num_features=64, eps=1e-05, momentum=0.9, gamma=Parameter (name=bn1.gamma, shape=(64,), dtype=Float32, requires_grad=True), beta=Parameter (name=bn1.beta, shape=(64,), dtype=Float32, requires_grad=True), moving_mean=Parameter (name=bn1.moving_mean, shape=(64,), dtype=Float32, requires_grad=False), moving_variance=Parameter (name=bn1.moving_variance, shape=(64,), dtype=Float32, requires_grad=False)>
+  (relu): ReLU<>
+  (max_pool): MaxPool2d<kernel_size=3, stride=2, pad_mode=SAME>
+  ...
+  (pool): GlobalAvgPooling<>
+  (classifier): Dense<input_channels=2048, output_channels=1000, has_bias=True>
+  >
+
+

2). 当接口中的checkpoint_path参数设置为文件路径时,可以从本地加载后缀为.ckpt的模型参数文件。

+
from mindcv.models import create_model
+model = create_model(model_name='resnet50', num_classes=1000, checkpoint_path='./resnet50_224.ckpt')
+# 切换网络的执行逻辑为推理场景
+model.set_train(False)
+
+

数据准备

+

构造数据集

+

这里,我们下载一张Wikipedia的图片作为测试图片,使用mindcv.data中的create_dataset函数,为单张图片构造自定义数据集。

+
from mindcv.data import create_dataset
+num_workers = 1
+# 数据集目录路径
+data_dir = "./data/"
+dataset = create_dataset(root=data_dir, split='test', num_parallel_workers=num_workers)
+# 图像可视
+from PIL import Image
+Image.open("./data/test/dog/dog.jpg")
+
+

png

+

数据预处理

+

通过调用create_transforms函数,获得预训练模型使用的ImageNet数据集的数据处理策略(transform list)。

+

我们将得到的transform list传入create_loader函数,指定batch_size=1和其他参数,即可完成测试数据的准备,返回Dataset Object,作为模型的输入。

+
from mindcv.data import create_transforms, create_loader
+transforms_list = create_transforms(dataset_name='imagenet', is_training=False)
+data_loader = create_loader(
+    dataset=dataset,
+    batch_size=1,
+    is_training=False,
+    num_classes=1000,
+    transform=transforms_list,
+    num_parallel_workers=num_workers
+)
+
+

模型推理

+

将自定义数据集的图片传入模型,获得推理的结果。这里使用mindspore.opsSqueeze函数去除batch维度。

+
import mindspore.ops as P
+import numpy as np
+images, _ = next(data_loader.create_tuple_iterator())
+output = P.Squeeze()(model(images))
+pred = np.argmax(output.asnumpy())
+
+
with open("imagenet1000_clsidx_to_labels.txt") as f:
+    idx2label = eval(f.read())
+print('predict: {}'.format(idx2label[pred]))
+
+
predict: Labrador retriever
+
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/zh/tutorials/quick_start/index.html b/zh/tutorials/quick_start/index.html new file mode 100644 index 000000000..64ba7c4b0 --- /dev/null +++ b/zh/tutorials/quick_start/index.html @@ -0,0 +1,1414 @@ + + + + + + + + + + + + + + + + + + + + + + + + 快速开始 - MindCV Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + 跳转至 + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

快速入门

+

下载Notebook

+

MindCV是一个基于MindSpore开发的,致力于计算机视觉相关技术研发的开源工具箱。 +它提供大量的计算机视觉领域的经典模型和SoTA模型以及它们的预训练权重。同时,还提供了AutoAugment等SoTA算法来提高性能。 +通过解耦的模块设计,您可以轻松地将MindCV应用到您自己的CV任务中。本教程中我们将提供一个快速上手MindCV的指南。

+

本教程将以DenseNet分类模型为例,实现对CIFAR-10数据集的迁移学习,并在此流程中对MindCV各模块的用法作讲解。

+

环境准备

+

详见安装

+

数据

+

数据集

+

通过mindcv.data中的create_dataset模块,我们可以快速地读取标准数据集或自定义的数据集。

+
import os
+from mindcv.data import create_dataset, create_transforms, create_loader
+
+cifar10_dir = './datasets/cifar/cifar-10-batches-bin'  # 你的数据存放路径
+num_classes = 10  # 类别数
+num_workers = 8  # 数据读取及加载的工作线程数
+
+# 创建数据集
+dataset_train = create_dataset(
+    name='cifar10', root=cifar10_dir, split='train', shuffle=True, num_parallel_workers=num_workers
+)
+
+

数据变换

+

create_transforms函数可直接生成适配标准数据集的数据处理增强策略(transform list),包括Cifar10, ImageNet上常用的数据处理策略。

+
# 创建所需的数据增强操作的列表
+trans = create_transforms(dataset_name='cifar10', image_resize=224)
+
+

数据加载

+

通过mindcv.data.create_loader函数,进行数据转换和batch切分加载,我们需要将create_transforms返回的transform_list传入。

+
# 执行数据增强操作,生成所需数据集。
+loader_train = create_loader(dataset=dataset_train,
+                             batch_size=64,
+                             is_training=True,
+                             num_classes=num_classes,
+                             transform=trans,
+                             num_parallel_workers=num_workers)
+
+num_batches = loader_train.get_dataset_size()
+
+
+

在notebook中避免重复执行create_loader单个Cell,或在执行create_dataset之后再次执行。

+
+

模型创建和加载

+

使用create_model接口获得实例化的DenseNet,并加载预训练权重densenet_121_224.ckpt(ImageNet数据集训练得到)。

+
from mindcv.models import create_model
+
+# 实例化 DenseNet-121 模型并加载预训练权重。
+network = create_model(model_name='densenet121', num_classes=num_classes, pretrained=True)
+
+
+

由于CIFAR-10和ImageNet数据集所需类别数量不同,分类器参数无法共享,出现分类器参数无法加载的告警不影响微调。

+
+

损失函数

+

通过create_loss接口获得损失函数

+
from mindcv.loss import create_loss
+
+loss = create_loss(name='CE')
+
+

学习率调度器

+

使用create_scheduler接口设置学习率策略。

+
from mindcv.scheduler import create_scheduler
+
+# 设置学习率策略
+lr_scheduler = create_scheduler(steps_per_epoch=num_batches,
+                                scheduler='constant',
+                                lr=0.0001)
+
+

优化器

+

使用create_optimizer接口创建优化器。

+
from mindcv.optim import create_optimizer
+
+# 设置优化器
+opt = create_optimizer(network.trainable_params(), opt='adam', lr=lr_scheduler)
+
+

训练

+

使用mindspore.Model接口根据用户传入的参数封装可训练的实例。

+
from mindspore import Model
+
+# 封装可训练或推理的实例
+model = Model(network, loss_fn=loss, optimizer=opt, metrics={'accuracy'})
+
+

使用mindspore.Model.train接口进行模型训练。

+
from mindspore import LossMonitor, TimeMonitor, CheckpointConfig, ModelCheckpoint
+
+# 设置在训练过程中保存网络参数的回调函数
+ckpt_save_dir = './ckpt'
+ckpt_config = CheckpointConfig(save_checkpoint_steps=num_batches)
+ckpt_cb = ModelCheckpoint(prefix='densenet121-cifar10',
+                          directory=ckpt_save_dir,
+                          config=ckpt_config)
+
+model.train(5, loader_train, callbacks=[LossMonitor(num_batches//5), TimeMonitor(num_batches//5), ckpt_cb], dataset_sink_mode=False)
+
+
[WARNING] DEVICE(1769,ffff87c70ac0,python):2022-12-21-16:04:30.001.890 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:330] FilterRaisedOrReducePrecisionMatchedKernelInfo] Operator:[Default/network-WithLossCell/_loss_fn-CrossEntropySmooth/GatherD-op5273] don't support int64, reduce precision from int64 to int32.
+
+
+epoch: 1 step: 156, loss is 2.0816354751586914
+epoch: 1 step: 312, loss is 1.4474115371704102
+epoch: 1 step: 468, loss is 0.8935483694076538
+epoch: 1 step: 624, loss is 0.5588696002960205
+epoch: 1 step: 780, loss is 0.3161369860172272
+
+
+[WARNING] DEVICE(1769,ffff87c70ac0,python):2022-12-21-16:09:20.261.851 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:330] FilterRaisedOrReducePrecisionMatchedKernelInfo] Operator:[Default/network-WithLossCell/_loss_fn-CrossEntropySmooth/GatherD-op16720] don't support int64, reduce precision from int64 to int32.
+
+
+Train epoch time: 416429.509 ms, per step time: 532.519 ms
+epoch: 2 step: 154, loss is 0.19752007722854614
+epoch: 2 step: 310, loss is 0.14635677635669708
+epoch: 2 step: 466, loss is 0.3511860966682434
+epoch: 2 step: 622, loss is 0.12542471289634705
+epoch: 2 step: 778, loss is 0.22351759672164917
+Train epoch time: 156746.872 ms, per step time: 200.444 ms
+epoch: 3 step: 152, loss is 0.08965137600898743
+epoch: 3 step: 308, loss is 0.22765043377876282
+epoch: 3 step: 464, loss is 0.19035443663597107
+epoch: 3 step: 620, loss is 0.06591956317424774
+epoch: 3 step: 776, loss is 0.0934530645608902
+Train epoch time: 156574.210 ms, per step time: 200.223 ms
+epoch: 4 step: 150, loss is 0.03782692924141884
+epoch: 4 step: 306, loss is 0.023876197636127472
+epoch: 4 step: 462, loss is 0.038690414279699326
+epoch: 4 step: 618, loss is 0.15388774871826172
+epoch: 4 step: 774, loss is 0.1581358164548874
+Train epoch time: 158398.108 ms, per step time: 202.555 ms
+epoch: 5 step: 148, loss is 0.06556802988052368
+epoch: 5 step: 304, loss is 0.006707251071929932
+epoch: 5 step: 460, loss is 0.02353120595216751
+epoch: 5 step: 616, loss is 0.014183484017848969
+epoch: 5 step: 772, loss is 0.09367241710424423
+Train epoch time: 154978.618 ms, per step time: 198.182 ms
+
+

评估

+

现在让我们在CIFAR-10上对刚刚训练的模型进行评估。

+
# 加载验证数据集
+dataset_val = create_dataset(name='cifar10', root=cifar10_dir, split='test', shuffle=True, num_parallel_workers=num_workers, download=download)
+
+# 执行数据增强操作,生成所需数据集。
+loader_val = create_loader(dataset=dataset_val,
+                           batch_size=64,
+                           is_training=False,
+                           num_classes=num_classes,
+                           transform=trans,
+                           num_parallel_workers=num_workers)
+
+

加载微调后的参数文件(densenet121-cifar10-5_782.ckpt)到模型。

+

根据用户传入的参数封装可推理的实例,加载验证数据集,验证微调的 DenseNet121模型精度。

+
# 验证微调后的DenseNet121的精度
+acc = model.eval(loader_val, dataset_sink_mode=False)
+print(acc)
+
+
[WARNING] DEVICE(1769,ffff87c70ac0,python):2022-12-21-16:24:11.927.472 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:330] FilterRaisedOrReducePrecisionMatchedKernelInfo] Operator:[Default/network-WithLossCell/_loss_fn-CrossEntropySmooth/GatherD-op24314] don't support int64, reduce precision from int64 to int32.
+
+
+{'accuracy': 0.951}
+
+
+[WARNING] DEVICE(1769,ffff87c70ac0,python):2022-12-21-16:25:01.871.273 [mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc:330] FilterRaisedOrReducePrecisionMatchedKernelInfo] Operator:[Default/network-WithLossCell/_loss_fn-CrossEntropySmooth/GatherD-op27139] don't support int64, reduce precision from int64 to int32.
+
+

使用YAML文件进行模型训练和验证

+

我们还可以直接使用设置好模型参数的yaml文件,通过train.pyvalidate.py脚本来快速来对模型进行训练和验证。以下是在ImageNet上训练SqueezenetV1的示例 (需要将ImageNet提前下载到目录下)

+
+

详细教程请参考 使用yaml文件的教程

+
+
#  单卡训练
+python train.py -c configs/squeezenet/squeezenet_1.0_gpu.yaml --data_dir /path/to/dataset --distribute False
+
+
python validate.py -c configs/squeezenet/squeezenet_1.0_gpu.yaml --data_dir /path/to/dataset --ckpt_path /path/to/ckpt
+
+ + + + + + +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + \ No newline at end of file