Skip to content

Commit

Permalink
feat: upgrade ms to v2.0 while maintaining compatibility of v1.8 (min…
Browse files Browse the repository at this point in the history
  • Loading branch information
geniuspatrick authored Jun 14, 2023
1 parent dbc49e7 commit d55563c
Show file tree
Hide file tree
Showing 53 changed files with 297 additions and 149 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,9 @@ dmypy.json
# IDEs
.idea/
.vscode/

# outputs
rank_*/
ckpt/
output/
outputs/
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ In contrast, [pynative mode](https://www.mindspore.cn/tutorials/zh-CN/r1.8/advan
**Mixed Mode**:
[Pynative mode with ms_function ](https://www.mindspore.cn/tutorials/zh-CN/r1.8/advanced/pynative_graph/combine.html) is a mixed mode for comprising flexibility and efficiency in MindSpore. To apply pynative mode with ms_function for training, please run `train_with_func.py`, e.g.,
[PyNative mode with mindspore.jit](https://www.mindspore.cn/tutorials/zh-CN/r1.8/advanced/pynative_graph/combine.html) is a mixed mode for comprising flexibility and efficiency in MindSpore. To apply pynative mode with mindspore.jit for training, please run `train_with_func.py`, e.g.,
```shell
python train_with_func.py --model=resnet50 --dataset=cifar10 --dataset_download --epoch_size=10
Expand Down
2 changes: 1 addition & 1 deletion README_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ MindCV是一个基于 [MindSpore](https://www.mindspore.cn/) 开发的,致力

**混合模式**

[基于ms_function的混合模式](https://www.mindspore.cn/tutorials/zh-CN/r1.8/advanced/pynative_graph/combine.html) 是兼顾了MindSpore的效率和灵活的混合模式。用户可通过使用`train_with_func.py`文件来使用该混合模式进行训练。
[基于mindspore.jit的混合模式](https://www.mindspore.cn/tutorials/zh-CN/r1.8/advanced/pynative_graph/combine.html) 是兼顾了MindSpore的效率和灵活的混合模式。用户可通过使用`train_with_func.py`文件来使用该混合模式进行训练。

```shell
python train_with_func.py --model=resnet50 --dataset=cifar10 --dataset_download --epoch_size=10
Expand Down
2 changes: 1 addition & 1 deletion docs/en/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ It is easy to train your model on a standard or customized dataset using `train.
!!! warning "Mixed Mode"
[Pynative mode with ms_function](https://www.mindspore.cn/tutorials/zh-CN/r1.8/advanced/pynative_graph/combine.html) is a mixed mode for comprising flexibility and efficiency in MindSpore. To apply pynative mode with ms_function for training, please run `train_with_func.py`, e.g.,
[PyNative mode with mindspore.jit](https://www.mindspore.cn/tutorials/zh-CN/r1.8/advanced/pynative_graph/combine.html) is a mixed mode for comprising flexibility and efficiency in MindSpore. To apply pynative mode with mindspore.jit for training, please run `train_with_func.py`, e.g.,
```shell
python train_with_func.py --model=resnet50 --dataset=cifar10 --dataset_download --epoch_size=10
Expand Down
2 changes: 1 addition & 1 deletion docs/zh/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ MindCV是一个基于 [MindSpore](https://www.mindspore.cn/) 开发的,致力

!!! warning "混合模式"

[基于ms_function的混合模式](https://www.mindspore.cn/tutorials/zh-CN/r1.8/advanced/pynative_graph/combine.html) 是兼顾了MindSpore的效率和灵活的混合模式。用户可通过使用`train_with_func.py`文件来使用该混合模式进行训练。
[基于mindspore.jit的混合模式](https://www.mindspore.cn/tutorials/zh-CN/r1.8/advanced/pynative_graph/combine.html) 是兼顾了MindSpore的效率和灵活的混合模式。用户可通过使用`train_with_func.py`文件来使用该混合模式进行训练。

```shell
python train_with_func.py --model=resnet50 --dataset=cifar10 --dataset_download --epoch_size=10
Expand Down
11 changes: 8 additions & 3 deletions examples/train_parallel_with_func_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@
from mindcv.loss import create_loss
from mindcv.models import create_model
from mindcv.optim import create_optimizer
from mindcv.utils import Allreduce
from mindcv.utils import AllReduceSum

try:
from mindspore import jit
except ImportError:
from mindspore import ms_function as jit


def main():
Expand Down Expand Up @@ -117,7 +122,7 @@ def forward_fn(data, label):
grad_reducer = nn.DistributedGradReducer(optimizer.parameters, mean, degree)

# Define function of one-step training,
@ms.ms_function
@jit
def train_step_parallel(data, label):
(loss, _), grads = grad_fn(data, label)
grads = grad_reducer(grads)
Expand All @@ -143,7 +148,7 @@ def test_epoch(network, dataset):
correct += (pred.argmax(1) == label).asnumpy().sum()
else: # one-hot or soft label
correct += (pred.argmax(1) == label.argmax(1)).asnumpy().sum()
all_reduce = Allreduce()
all_reduce = AllReduceSum()
correct = all_reduce(Tensor(correct, ms.float32))
total = all_reduce(Tensor(total, ms.float32))
correct /= total
Expand Down
7 changes: 6 additions & 1 deletion examples/train_with_func_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
from mindcv.models import create_model
from mindcv.optim import create_optimizer

try:
from mindspore import jit
except ImportError:
from mindspore import ms_function as jit


def main():
ms.set_seed(1)
Expand Down Expand Up @@ -96,7 +101,7 @@ def forward_fn(data, label):
grad_fn = ops.value_and_grad(forward_fn, None, optimizer.parameters, has_aux=True)

# Define function of one-step training,
@ms.ms_function
@jit
def train_step(data, label):
(loss, _), grads = grad_fn(data, label)
loss = ops.depend(loss, optimizer(grads))
Expand Down
21 changes: 11 additions & 10 deletions mindcv/models/cait.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from mindspore.common.initializer import TruncatedNormal

from .helpers import load_pretrained
from .layers.compatibility import Dropout
from .layers.drop_path import DropPath
from .layers.mlp import Mlp
from .layers.patch_embed import PatchEmbed
Expand Down Expand Up @@ -41,12 +42,12 @@ def _cfg(url='', **kwargs):

default_cfgs = {
"cait_xxs24_224": _cfg(url=''),
"cait_xs24_384": _cfg(url=''),
"cait_xs24_384": _cfg(url='', input_size=(3, 384, 384)),
"cait_s24_224": _cfg(url=''),
"cait_s24_384": _cfg(url=''),
"cait_s36_384": _cfg(url=''),
"cait_m36_384": _cfg(url=''),
"cait_m48_448": _cfg(url=''),
"cait_s24_384": _cfg(url='', input_size=(3, 384, 384)),
"cait_s36_384": _cfg(url='', input_size=(3, 384, 384)),
"cait_m36_384": _cfg(url='', input_size=(3, 384, 384)),
"cait_m48_448": _cfg(url='', input_size=(3, 448, 448)),
}


Expand All @@ -67,9 +68,9 @@ def __init__(self,
self.q = nn.Dense(dim, dim, has_bias=qkv_bias)
self.k = nn.Dense(dim, dim, has_bias=qkv_bias)
self.v = nn.Dense(dim, dim, has_bias=qkv_bias)
self.attn_drop = nn.Dropout(1 - attn_drop_rate)
self.attn_drop = Dropout(p=attn_drop_rate)
self.proj = nn.Dense(dim, dim)
self.proj_drop = nn.Dropout(1 - proj_drop_rate)
self.proj_drop = Dropout(p=proj_drop_rate)
self.softmax = nn.Softmax(axis=-1)

self.attn_matmul_v = ops.BatchMatMul()
Expand Down Expand Up @@ -156,14 +157,14 @@ def __init__(self,
self.scale = qk_scale or head_dim ** -0.5

self.qkv = nn.Dense(dim, dim * 3, has_bias=qkv_bias)
self.attn_drop = nn.Dropout(1 - attn_drop_rate)
self.attn_drop = Dropout(p=attn_drop_rate)

self.proj = nn.Dense(dim, dim, has_bias=False)

self.proj_l = nn.Dense(num_heads, num_heads, has_bias=False)
self.proj_w = nn.Dense(num_heads, num_heads, has_bias=False)

self.proj_drop = nn.Dropout(1 - proj_drop_rate)
self.proj_drop = Dropout(p=proj_drop_rate)

self.softmax = nn.Softmax(axis=-1)

Expand Down Expand Up @@ -271,7 +272,7 @@ def __init__(self,
zeros = ops.Zeros()
self.cls_token = Parameter(zeros((1, 1, embed_dim), ms.float32))
self.pos_embed = Parameter(zeros((1, num_patches, embed_dim), ms.float32))
self.pos_drop = nn.Dropout(1 - drop_rate)
self.pos_drop = Dropout(p=drop_rate)

dpr = [drop_path_rate for i in range(depth)]

Expand Down
16 changes: 7 additions & 9 deletions mindcv/models/coat.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
import mindspore.common.initializer as init
import mindspore.nn as nn
import mindspore.ops as ops
from mindspore import Tensor, ms_function
from mindspore import Tensor
from mindspore.numpy import split

from .helpers import load_pretrained
from .layers.compatibility import Dropout, Interpolate
from .layers.drop_path import DropPath
from .layers.identity import Identity
from .registry import register_model
Expand Down Expand Up @@ -70,7 +71,7 @@ def __init__(
self.fc1 = nn.Dense(in_channels=in_features, out_channels=hidden_features, has_bias=True)
self.act = nn.GELU(approximate=False)
self.fc2 = nn.Dense(in_channels=hidden_features, out_channels=out_features, has_bias=True)
self.drop = nn.Dropout(keep_prob=1.0 - drop)
self.drop = Dropout(p=drop)

def construct(self, x: Tensor) -> Tensor:
x = self.fc1(x)
Expand Down Expand Up @@ -118,7 +119,6 @@ def __init__(
self.idx1 = self.channel_splits[0]
self.idx2 = self.channel_splits[0] + self.channel_splits[1]

@ms_function
def construct(self, q, v, size) -> Tensor:

B, h, N, Ch = q.shape
Expand Down Expand Up @@ -167,9 +167,9 @@ def __init__(
self.q = nn.Dense(in_channels=dim, out_channels=dim, has_bias=qkv_bias)
self.k = nn.Dense(in_channels=dim, out_channels=dim, has_bias=qkv_bias)
self.v = nn.Dense(in_channels=dim, out_channels=dim, has_bias=qkv_bias)
self.attn_drop = nn.Dropout(keep_prob=1 - attn_drop)
self.attn_drop = Dropout(p=attn_drop)
self.proj = nn.Dense(dim, dim)
self.proj_drop = nn.Dropout(keep_prob=1 - proj_drop)
self.proj_drop = Dropout(p=proj_drop)
self.softmax = nn.Softmax(axis=-1)
self.batch_matmul = ops.BatchMatMul()

Expand Down Expand Up @@ -323,6 +323,7 @@ def __init__(
shared_crpe=shared_crpes[3]
)
self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
self.interpolate_fn = Interpolate(mode="bilinear", align_corners=True)

self.norm22 = nn.LayerNorm((dims[1],), epsilon=1e-6)
self.norm23 = nn.LayerNorm((dims[2],), epsilon=1e-6)
Expand All @@ -349,10 +350,7 @@ def interpolate(self, x, output_size, size) -> Tensor:

img_tokens = ops.transpose(img_tokens, (0, 2, 1))
img_tokens = ops.reshape(img_tokens, (B, C, H, W))
img_tokens = ops.interpolate(img_tokens,
sizes=output_size,
mode='bilinear'
)
img_tokens = self.interpolate_fn(img_tokens, size=output_size)
img_tokens = ops.reshape(img_tokens, (B, C, -1))
img_tokens = ops.transpose(img_tokens, (0, 2, 1))

Expand Down
11 changes: 6 additions & 5 deletions mindcv/models/convit.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from mindspore.ops import constexpr

from .helpers import load_pretrained
from .layers.compatibility import Dropout
from .layers.drop_path import DropPath
from .layers.identity import Identity
from .layers.mlp import Mlp
Expand Down Expand Up @@ -85,10 +86,10 @@ def __init__(
self.k = nn.Dense(in_channels=dim, out_channels=dim, has_bias=qkv_bias)
self.v = nn.Dense(in_channels=dim, out_channels=dim, has_bias=qkv_bias)

self.attn_drop = nn.Dropout(keep_prob=1.0 - attn_drop)
self.attn_drop = Dropout(p=attn_drop)
self.proj = nn.Dense(in_channels=dim, out_channels=dim)
self.pos_proj = nn.Dense(in_channels=3, out_channels=num_heads)
self.proj_drop = nn.Dropout(keep_prob=1.0 - proj_drop)
self.proj_drop = Dropout(p=proj_drop)
self.gating_param = Parameter(ops.ones((num_heads), ms.float32))
self.softmax = nn.Softmax(axis=-1)
self.batch_matmul = ops.BatchMatMul()
Expand Down Expand Up @@ -144,9 +145,9 @@ def __init__(
self.q = nn.Dense(in_channels=dim, out_channels=dim, has_bias=qkv_bias)
self.k = nn.Dense(in_channels=dim, out_channels=dim, has_bias=qkv_bias)
self.v = nn.Dense(in_channels=dim, out_channels=dim, has_bias=qkv_bias)
self.attn_drop = nn.Dropout(keep_prob=1.0 - attn_drop)
self.attn_drop = Dropout(p=attn_drop)
self.proj = nn.Dense(in_channels=dim, out_channels=dim)
self.proj_drop = nn.Dropout(keep_prob=1.0 - proj_drop)
self.proj_drop = Dropout(p=proj_drop)
self.softmax = nn.Softmax(axis=-1)
self.batch_matmul = ops.BatchMatMul()

Expand Down Expand Up @@ -261,7 +262,7 @@ def __init__(
self.num_patches = self.patch_embed.num_patches

self.cls_token = Parameter(ops.Zeros()((1, 1, embed_dim), ms.float32))
self.pos_drop = nn.Dropout(keep_prob=1.0 - drop_rate)
self.pos_drop = Dropout(p=drop_rate)

if self.use_pos_embed:
self.pos_embed = Parameter(ops.Zeros()((1, self.num_patches, embed_dim), ms.float32))
Expand Down
15 changes: 8 additions & 7 deletions mindcv/models/crossvit.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from mindspore.common.initializer import TruncatedNormal

from .helpers import load_pretrained
from .layers.compatibility import Dropout, Interpolate
from .layers.drop_path import DropPath
from .layers.helpers import to_2tuple
from .layers.identity import Identity
Expand Down Expand Up @@ -55,9 +56,9 @@ def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0.)
self.scale = head_dim ** -0.5

self.qkv = nn.Dense(dim, dim * 3, has_bias=qkv_bias)
self.attn_drop = nn.Dropout(1.0 - attn_drop)
self.attn_drop = Dropout(p=attn_drop)
self.proj = nn.Dense(dim, dim)
self.proj_drop = nn.Dropout(1.0 - proj_drop)
self.proj_drop = Dropout(p=proj_drop)

def construct(self, x: Tensor) -> Tensor:
B, N, C = x.shape
Expand Down Expand Up @@ -157,9 +158,9 @@ def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.
self.wq = nn.Dense(dim, dim, has_bias=qkv_bias)
self.wk = nn.Dense(dim, dim, has_bias=qkv_bias)
self.wv = nn.Dense(dim, dim, has_bias=qkv_bias)
self.attn_drop = nn.Dropout(1.0 - attn_drop)
self.attn_drop = Dropout(p=attn_drop)
self.proj = nn.Dense(dim, dim)
self.proj_drop = nn.Dropout(1.0 - proj_drop)
self.proj_drop = Dropout(p=proj_drop)

def construct(self, x: Tensor) -> Tensor:
B, N, C = x.shape # 3,3,16
Expand Down Expand Up @@ -325,6 +326,7 @@ def __init__(self, img_size=(224, 224), patch_size=(8, 16), in_channels=3, num_c

num_patches = _compute_num_patches(img_size, patch_size)
self.num_branches = len(patch_size)
self.interpolate = Interpolate(mode="bilinear", align_corners=True)

patch_embed = []
if hybrid_backbone is None:
Expand All @@ -346,7 +348,7 @@ def __init__(self, img_size=(224, 224), patch_size=(8, 16), in_channels=3, num_c
d.append(c)
d = tuple(d)
self.cls_token = ms.ParameterTuple(d)
self.pos_drop = nn.Dropout(1.0 - drop_rate)
self.pos_drop = Dropout(p=drop_rate)

total_depth = sum([sum(x[-2:]) for x in depth])
dpr = np.linspace(0, drop_path_rate, total_depth) # stochastic depth decay rule
Expand Down Expand Up @@ -403,8 +405,7 @@ def forward_features(self, x: Tensor) -> Tensor:
xs = []
# print(x)
for i in range(self.num_branches):
x_ = ops.interpolate(x, sizes=(self.img_size[i], self.img_size[i]), mode='bilinear') if H != self.img_size[
i] else x
x_ = self.interpolate(x, size=(self.img_size[i], self.img_size[i])) if H != self.img_size[i] else x
tmp = self.patch_embed[i](x_)
z = self.cls_token[i].shape
y = Tensor(np.ones((B, z[1], z[2])), dtype=mstype.float32)
Expand Down
3 changes: 2 additions & 1 deletion mindcv/models/densenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from mindspore import Tensor, nn, ops

from .helpers import load_pretrained
from .layers.compatibility import Dropout
from .layers.pooling import GlobalAvgPooling
from .registry import register_model

Expand Down Expand Up @@ -61,7 +62,7 @@ def __init__(
self.conv2 = nn.Conv2d(bn_size * growth_rate, growth_rate, kernel_size=3, stride=1, pad_mode="pad", padding=1)

self.drop_rate = drop_rate
self.dropout = nn.Dropout(keep_prob=1 - self.drop_rate)
self.dropout = Dropout(p=self.drop_rate)

def construct(self, features: Tensor) -> Tensor:
bottleneck = self.conv1(self.relu1(self.norm1(features)))
Expand Down
Loading

0 comments on commit d55563c

Please sign in to comment.