From fdeee39659c35b0901c6ac6aba9d49bf97a0c46c Mon Sep 17 00:00:00 2001 From: Songyuanwei <52945530+Songyuanwei@users.noreply.github.com> Date: Tue, 4 Apr 2023 15:11:10 +0800 Subject: [PATCH] feat: add crossvit9 ckpt, yaml and ut test (#585) --- configs/crossvit/README.md | 1 + configs/crossvit/crossvit_9_ascend.yaml | 63 +++++++++++++++++++++++++ mindcv/models/crossvit.py | 17 +++++++ tests/modules/test_models.py | 1 + 4 files changed, 82 insertions(+) create mode 100644 configs/crossvit/crossvit_9_ascend.yaml diff --git a/configs/crossvit/README.md b/configs/crossvit/README.md index ce462237..921918cc 100644 --- a/configs/crossvit/README.md +++ b/configs/crossvit/README.md @@ -23,6 +23,7 @@ Our reproduced model performance on ImageNet-1K is reported as follows. | Model | Context | Top-1 (%) | Top-5 (%) | Params (M) | Recipe | Download | |-------------|----------|-----------|-----------|------------|-----------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------| +| crossvit_9 | D910x8-G | 73.56 | 91.79 | 8.55 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/crossvit/crossvit_9_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/crossvit/crossvit_9-e74c8e18.ckpt) | | crossvit_15 | D910x8-G | 81.08 | 95.33 | 27.27 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/crossvit/crossvit_15_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/crossvit/crossvit_15-eaa43c02.ckpt) | | crossvit_18 | D910x8-G | 81.93 | 95.75 | 43.27 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/crossvit/crossvit_18_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/crossvit/crossvit_18-ca0a2e43.ckpt) | diff --git a/configs/crossvit/crossvit_9_ascend.yaml b/configs/crossvit/crossvit_9_ascend.yaml new file mode 100644 index 00000000..fad9401d --- /dev/null +++ b/configs/crossvit/crossvit_9_ascend.yaml @@ -0,0 +1,63 @@ +# system +mode: 0 +distribute: True +num_parallel_workers: 8 +val_while_train: True + +# dataset +dataset: 'imagenet' +data_dir: '/path/to/imagenet' +shuffle: True +dataset_download: False +batch_size: 256 +drop_remainder: True + +# augmentation +image_resize: 240 +scale: [0.08, 1.0] +ratio: [0.75, 1.333] +hflip: 0.5 +vflip: 0. +interpolation: 'bicubic' +auto_augment: 'randaug-m9-mstd0.5-inc1' +re_prob: 0.25 +mixup: 0.8 +cutmix: 1.0 +color_jitter: 0.4 +crop_pct: 0.935 + +# model +model: 'crossvit9' +num_classes: 1000 +pretrained: False +ckpt_path: '' +keep_checkpoint_max: 10 +ckpt_save_dir: './ckpt' +epoch_size: 300 +dataset_sink_mode: True +amp_level: 'O2' +drop_path_rate: 0.1 + +# loss +loss: 'CE' +label_smoothing: 0.1 + +# lr scheduler +scheduler: 'cosine_decay' +lr: 0.0011 +min_lr: 0.00001 +warmup_epochs: 30 +decay_epochs: 270 +decay_rate: 0.1 + +# optimizer +opt: 'adamw' +weight_decay: 0.05 +filter_bias_and_bn: True +loss_scale_type: 'dynamic' +drop_overflow_update: True +use_nesterov: False +eps: 1e-8 + +# Scheduler parameters +lr_epoch_stair: True diff --git a/mindcv/models/crossvit.py b/mindcv/models/crossvit.py index 3e21825f..216485de 100644 --- a/mindcv/models/crossvit.py +++ b/mindcv/models/crossvit.py @@ -21,6 +21,7 @@ from .utils import load_pretrained __all__ = [ + "crossvit9", "crossvit15", "crossvit18", ] @@ -30,6 +31,7 @@ def _cfg(url='', **kwargs): return { 'url': url, 'num_classes': 1000, + "input_size": (3, 224, 224), 'first_conv': 'patch_embed.proj', 'classifier': 'head', **kwargs @@ -37,6 +39,9 @@ def _cfg(url='', **kwargs): default_cfgs = { + "crossvit_9": _cfg( + url="https://download.mindspore.cn/toolkits/mindcv/crossvit/crossvit_9-e74c8e18.ckpt", + input_size=(3, 240, 240)), "crossvit_15": _cfg(url="https://download.mindspore.cn/toolkits/mindcv/crossvit/crossvit_15-eaa43c02.ckpt"), "crossvit_18": _cfg(url="https://download.mindspore.cn/toolkits/mindcv/crossvit/crossvit_18-ca0a2e43.ckpt"), } @@ -445,6 +450,18 @@ def construct(self, x: Tensor) -> Tensor: return x +@register_model +def crossvit9(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs): + model = VisionTransformer(img_size=[240, 224], + patch_size=[12, 16], embed_dim=[128, 256], depth=[[1, 3, 0], [1, 3, 0], [1, 3, 0]], + num_heads=[4, 4], mlp_ratio=[3, 3, 1], qkv_bias=True, + norm_layer=nn.LayerNorm, in_channels=in_channels, num_classes=num_classes, **kwargs) + default_cfg = default_cfgs["crossvit_9"] + if pretrained: + load_pretrained(model, default_cfg, num_classes=num_classes, in_channels=in_channels) + return model + + @register_model def crossvit15(pretrained: bool = False, num_classes: int = 1000, in_channels=3, **kwargs) -> VisionTransformer: model = VisionTransformer(img_size=[240, 224], diff --git a/tests/modules/test_models.py b/tests/modules/test_models.py index 4a05446a..604e0c44 100644 --- a/tests/modules/test_models.py +++ b/tests/modules/test_models.py @@ -24,6 +24,7 @@ "RepMLPNet_T224", "convit_tiny", "convnext_tiny", + "crossvit9", "densenet121", "dpn92", "edgenext_small",