Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

请问在moderarts上怎么配置,里边预训练模型这些在哪儿下? #276

Open
marvellee1 opened this issue Dec 8, 2022 · 5 comments

Comments

@marvellee1
Copy link

%8R Z B7HF@${WBRD_ JWTB
C(_F7Q{RAQQ{UXW(TPM34PL

@zhangjiajin
Copy link
Member

请给出更详细的日志。

@marvellee1
Copy link
Author

(MindSpore) [ma-user examples]$vega ./nas/sp_nas/spnas_md.yml -b m -d NPU
2022-12-08 19:13:41.377 INFO ------------------------------------------------
2022-12-08 19:13:41.377 INFO task id: 1208.191334.031
2022-12-08 19:13:41.377 INFO ------------------------------------------------
2022-12-08 19:13:41.379 INFO configure: {
"general": {
"backend": "m",
"device_category": "NPU",
"cluster": {}
},
"pipeline": [
"serial"
],
"serial": {
"pipe_step": {
"type": "SearchPipeStep"
},
"search_algorithm": {
"type": "SpNasS",
"max_sample": 1,
"objective_keys": "AP50"
},
"search_space": {
"type": "SearchSpace",
"hyperparameters": [
{
"key": "network.backbone.code",
"type": "CATEGORY",
"range": [
"111-2111-211111-211"
]
}
]
},
"model": {
"model_desc": {
"type": "Faster_Rcnn_MD"
}
},
"trainer": {
"type": "SpNasTrainerCallback",
"epochs": 6,
"get_train_metric_after_epoch": false,
"model_statistics": false,
"is_detection_trainer": true,
"perfs_cmp_key": "AP50",
"optimizer": {
"type": "SGD",
"params": {
"lr": 0.03,
"momentum": 0.9,
"weight_decay": 0.0001
}
},
"lr_scheduler": {
"type": "WarmupScheduler",
"by_epoch": false,
"params": {
"warmup_type": "linear",
"warmup_iters": 2000,
"warmup_ratio": 0.001,
"after_scheduler_config": {
"type": "MultiStepLR",
"by_epoch": true,
"params": {
"milestones": [
10,
20
],
"gamma": 0.1
}
}
}
},
"loss": {
"type": "SumLoss"
},
"metric": {
"type": "coco",
"params": {
"anno_path": "/home/ma-user/work/cache/datasets/COCO2017/annotations/instances_val2017.json"
}
}
},
"dataset": {
"type": "CocoDataset",
"common": {
"batch_size": 16,
"num_parallel_workers": 1,
"flip_ratio": 0.5,
"expand_ratio": 1.0,
"img_width": 700,
"img_height": 512,
"keep_ratio": true,
"device_id": 0,
"device_num": 1,
"rank_id": 0,
"python_multiprocessing": true,
"coco_root": "/home/ma-user/work/cache/datasets/COCO2017",
"mindrecord_dir": "/home/ma-user/work/cache/MindRecord_COCO_TRAIN",
"instance_set": "annotations/instances_{}.json",
"coco_classes": [
"target",
"fault"
],
"num_classes": 3
},
"train": {
"train_data_type": "train2017"
},
"val": {
"val_data_type": "val2017",
"test_batch_size": 64
}
}
},
"parallel": {
"pipe_step": {
"type": "SearchPipeStep",
"models_folder": "{local_base_path}/output/serial/",
"pretrained_folder": "{local_base_path}/output/serial/"
},
"search_algorithm": {
"type": "SpNasP"
},
"search_space": {
"type": "SearchSpace",
"hyperparameters": [
{
"key": "network.neck.code",
"type": "CATEGORY",
"range": [
[
0,
1,
2,
3
]
]
}
]
},
"model": {
"model_desc": {
"type": "Faster_Rcnn_MD"
}
},
"trainer": {
"ref": "serial.trainer",
"type": "SpNasTrainerCallback",
"epochs": 6,
"get_train_metric_after_epoch": false,
"model_statistics": false,
"is_detection_trainer": true,
"perfs_cmp_key": "AP50",
"optimizer": {
"type": "SGD",
"params": {
"lr": 0.03,
"momentum": 0.9,
"weight_decay": 0.0001
}
},
"lr_scheduler": {
"type": "WarmupScheduler",
"by_epoch": false,
"params": {
"warmup_type": "linear",
"warmup_iters": 2000,
"warmup_ratio": 0.001,
"after_scheduler_config": {
"type": "MultiStepLR",
"by_epoch": true,
"params": {
"milestones": [
10,
20
],
"gamma": 0.1
}
}
}
},
"loss": {
"type": "SumLoss"
},
"metric": {
"type": "coco",
"params": {
"anno_path": "/home/ma-user/work/cache/datasets/COCO2017/annotations/instances_val2017.json"
}
}
},
"dataset": {
"ref": "serial.dataset",
"type": "CocoDataset",
"common": {
"batch_size": 16,
"num_parallel_workers": 1,
"flip_ratio": 0.5,
"expand_ratio": 1.0,
"img_width": 700,
"img_height": 512,
"keep_ratio": true,
"device_id": 0,
"device_num": 1,
"rank_id": 0,
"python_multiprocessing": true,
"coco_root": "/home/ma-user/work/cache/datasets/COCO2017",
"mindrecord_dir": "/home/ma-user/work/cache/MindRecord_COCO_TRAIN",
"instance_set": "annotations/instances_{}.json",
"coco_classes": [
"target",
"fault"
],
"num_classes": 3
},
"train": {
"train_data_type": "train2017"
},
"val": {
"val_data_type": "val2017",
"test_batch_size": 64
}
}
},
"fullytrain": {
"pipe_step": {
"type": "TrainPipeStep",
"models_folder": "{local_base_path}/output/parallel/",
"pretrained_folder": "{local_base_path}/output/parallel/"
},
"trainer": {
"ref": "serial.trainer",
"epochs": 24,
"type": "SpNasTrainerCallback",
"get_train_metric_after_epoch": false,
"model_statistics": false,
"is_detection_trainer": true,
"perfs_cmp_key": "AP50",
"optimizer": {
"type": "SGD",
"params": {
"lr": 0.03,
"momentum": 0.9,
"weight_decay": 0.0001
}
},
"lr_scheduler": {
"type": "WarmupScheduler",
"by_epoch": false,
"params": {
"warmup_type": "linear",
"warmup_iters": 2000,
"warmup_ratio": 0.001,
"after_scheduler_config": {
"type": "MultiStepLR",
"by_epoch": true,
"params": {
"milestones": [
10,
20
],
"gamma": 0.1
}
}
}
},
"loss": {
"type": "SumLoss"
},
"metric": {
"type": "coco",
"params": {
"anno_path": "/home/ma-user/work/cache/datasets/COCO2017/annotations/instances_val2017.json"
}
}
},
"dataset": {
"ref": "serial.dataset",
"type": "CocoDataset",
"common": {
"batch_size": 16,
"num_parallel_workers": 1,
"flip_ratio": 0.5,
"expand_ratio": 1.0,
"img_width": 700,
"img_height": 512,
"keep_ratio": true,
"device_id": 0,
"device_num": 1,
"rank_id": 0,
"python_multiprocessing": true,
"coco_root": "/home/ma-user/work/cache/datasets/COCO2017",
"mindrecord_dir": "/home/ma-user/work/cache/MindRecord_COCO_TRAIN",
"instance_set": "annotations/instances_{}.json",
"coco_classes": [
"target",
"fault"
],
"num_classes": 3
},
"train": {
"train_data_type": "train2017"
},
"val": {
"val_data_type": "val2017",
"test_batch_size": 64
}
}
},
"abs_path": true
}
2022-12-08 19:13:41.379 INFO ------------------------------------------------
{}
2022-12-08 19:13:42.534 INFO ------------------------------------------------
2022-12-08 19:13:42.534 INFO Step: serial
2022-12-08 19:13:42.534 INFO ------------------------------------------------
2022-12-08 19:13:42.547 INFO Serial-level Sample1: expend -> swap -> expend. Success.
2022-12-08 19:13:42.548 INFO desc:{'network.backbone.code': '1111-211-12111111112-11'}
2022-12-08 19:13:42.575 INFO submit trainer, id=1
2022-12-08 19:13:42.577 INFO Run train/val in mode: 0.
2022-12-08 19:13:42.578 INFO minspore context, mode: 0, target: Ascend, device_id: 0
2022-12-08 19:13:42.578 INFO DEVICE_ID: 0
2022-12-08 19:13:42.578 INFO Dataset_sink_mode:True.
[WARNING] ME(119171:281472820496960,MainProcess):2022-12-08-19:13:43.413.059 [mindspore/common/tensor.py:1637] WARN_DEPRECATED: The usage of to_tensor is deprecated. Please use init_data
[WARNING] ME(119171:281472820496960,MainProcess):2022-12-08-19:13:43.416.528 [mindspore/common/tensor.py:1637] WARN_DEPRECATED: The usage of to_tensor is deprecated. Please use init_data
[WARNING] ME(119171:281472820496960,MainProcess):2022-12-08-19:13:43.432.797 [mindspore/common/tensor.py:1637] WARN_DEPRECATED: The usage of to_tensor is deprecated. Please use init_data
[WARNING] ME(119171:281472820496960,MainProcess):2022-12-08-19:13:43.436.952 [mindspore/common/tensor.py:1637] WARN_DEPRECATED: The usage of to_tensor is deprecated. Please use init_data
[WARNING] ME(119171:281472820496960,MainProcess):2022-12-08-19:13:43.453.311 [mindspore/common/tensor.py:1637] WARN_DEPRECATED: The usage of to_tensor is deprecated. Please use init_data
[WARNING] ME(119171:281472820496960,MainProcess):2022-12-08-19:13:43.457.830 [mindspore/common/tensor.py:1637] WARN_DEPRECATED: The usage of to_tensor is deprecated. Please use init_data
[WARNING] ME(119171:281472820496960,MainProcess):2022-12-08-19:13:43.472.690 [mindspore/common/tensor.py:1637] WARN_DEPRECATED: The usage of to_tensor is deprecated. Please use init_data
[WARNING] ME(119171:281472820496960,MainProcess):2022-12-08-19:13:43.478.160 [mindspore/common/tensor.py:1637] WARN_DEPRECATED: The usage of to_tensor is deprecated. Please use init_data
[WARNING] ME(119171:281472820496960,MainProcess):2022-12-08-19:13:43.498.580 [mindspore/common/tensor.py:1637] WARN_DEPRECATED: The usage of to_tensor is deprecated. Please use init_data
[WARNING] ME(119171:281472820496960,MainProcess):2022-12-08-19:13:43.511.148 [mindspore/common/tensor.py:1637] WARN_DEPRECATED: The usage of to_tensor is deprecated. Please use init_data
[WARNING] ME(119171:281472820496960,MainProcess):2022-12-08-19:13:43.511.647 [mindspore/common/tensor.py:1637] WARN_DEPRECATED: The usage of to_tensor is deprecated. Please use init_data
[WARNING] ME(119171:281472820496960,MainProcess):2022-12-08-19:13:43.513.333 [mindspore/common/tensor.py:1637] WARN_DEPRECATED: The usage of to_tensor is deprecated. Please use init_data
[WARNING] ME(119171:281472820496960,MainProcess):2022-12-08-19:13:43.513.772 [mindspore/common/tensor.py:1637] WARN_DEPRECATED: The usage of to_tensor is deprecated. Please use init_data
[WARNING] ME(119171:281472820496960,MainProcess):2022-12-08-19:13:43.515.253 [mindspore/common/tensor.py:1637] WARN_DEPRECATED: The usage of to_tensor is deprecated. Please use init_data
[WARNING] ME(119171:281472820496960,MainProcess):2022-12-08-19:13:43.942.775 [mindspore/common/tensor.py:1637] WARN_DEPRECATED: The usage of to_tensor is deprecated. Please use init_data
[WARNING] ME(119171:281472820496960,MainProcess):2022-12-08-19:13:44.232.465 [mindspore/common/tensor.py:1637] WARN_DEPRECATED: The usage of to_tensor is deprecated. Please use init_data
[WARNING] ME(119171:281472820496960,MainProcess):2022-12-08-19:13:44.301.238 [mindspore/common/tensor.py:1637] WARN_DEPRECATED: The usage of to_tensor is deprecated. Please use init_data
[WARNING] ME(119171:281472820496960,MainProcess):2022-12-08-19:13:44.304.187 [mindspore/common/tensor.py:1637] WARN_DEPRECATED: The usage of to_tensor is deprecated. Please use init_data
2022-12-08 19:13:44.327 INFO Model was created.
CHECKING MINDRECORD FILES ...
[WARNING] PIPELINE(119171,ffff7f7aca40,python3.7):2022-12-08-19:13:46.765.392 [mindspore/ccsrc/pipeline/jit/pipeline.cc:173] CheckArgValid] The data types of Tensor:[[ True False False ... False False False]
[ True False False ... False False False]
[ True False False ... False False False]
...
[ True False False ... False False False]
[ True False False ... False False False]
[ True False False ... False False False]] is bool, which may cause SelectKernelInfo failure for operator [AddN]. For more details, please refer to the FAQ at https://www.mindspore.cn.
[ERROR] ANALYZER(119171,ffff7f7aca40,python3.7):2022-12-08-19:14:10.941.108 [mindspore/ccsrc/pipeline/jit/static_analysis/async_eval_result.cc:66] HandleException] Exception happened, check the information as below.

The function call stack (See file '/home/ma-user/work/vega-master/examples/rank_0/om/analyze_fail.dat' for more details):

0 In file /home/ma-user/.local/lib/python3.7/site-packages/vega/algorithms/nas/sp_nas/src/network_define.py(155)

    if self.reduce_flag:

1 In file /home/ma-user/.local/lib/python3.7/site-packages/vega/algorithms/nas/sp_nas/src/network_define.py(154)

    grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, self.sens)
            ^

2 In file /home/ma-user/.local/lib/python3.7/site-packages/vega/algorithms/nas/sp_nas/src/network_define.py(107)

    loss1, loss2, loss3, loss4, loss5, loss6 = self._backbone(x, img_shape, gt_bboxe, gt_label, gt_num)
                                               ^

3 In file /home/ma-user/.local/lib/python3.7/site-packages/vega/networks/mindspore/faster_rcnn/faster_rcnn_resnet.py(267)

    if self.training:

4 In file /home/ma-user/.local/lib/python3.7/site-packages/vega/networks/mindspore/faster_rcnn/faster_rcnn_resnet.py(330)

                                   self.cast(x[0], mstype.float32),
                                             ^

5 In file /home/ma-user/.local/lib/python3.7/site-packages/vega/networks/mindspore/faster_rcnn/fpn_neck.py(102)

    for i in range(self.fpn_layer):

6 In file /home/ma-user/.local/lib/python3.7/site-packages/vega/networks/mindspore/faster_rcnn/fpn_neck.py(111)

    for i in range(self.fpn_layer - 1, -1, -1):
    ^

7 In file /home/ma-user/.local/lib/python3.7/site-packages/vega/networks/mindspore/faster_rcnn/fpn_neck.py(106)

    y = y + (x[2] + self.interpolate1(y[self.fpn_layer - 4]),)
             ^

2022-12-08 19:14:11.8 ERROR Failed to run worker, id: 1, message: For 'Add', x.shape and y.shape are supposed to broadcast, where broadcast means that x.shape[i] = 1 or -1 or y.shape[i] = 1 or -1 or x.shape[i] = y.shape[i], but now x.shape and y.shape can not broadcast, got i: -2, x.shape: [16, 256, 32, 44], y.shape: [16, 256, 48, 80].
2022-12-08 19:14:11.59 INFO Update Success. step_name=serial, worker_id=1
2022-12-08 19:14:11.59 INFO waiting for the workers [1] to finish
2022-12-08 19:14:11.60 INFO Best values: []
2022-12-08 19:14:11.62 WARNING Failed to dump pareto front records, report is emplty.
2022-12-08 19:14:13.74 INFO ------------------------------------------------
2022-12-08 19:14:13.74 INFO Pipeline end.
2022-12-08 19:14:13.74 INFO
2022-12-08 19:14:13.75 INFO task id: 1208.191334.031
2022-12-08 19:14:13.75 INFO output folder: /home/ma-user/work/vega-master/examples/tasks/1208.191334.031/output
2022-12-08 19:14:13.75 INFO
2022-12-08 19:14:13.75 INFO running time:
2022-12-08 19:14:13.76 INFO serial: 0:00:28 [2022-12-08 19:13:42.540940 - 2022-12-08 19:14:11.072154]
2022-12-08 19:14:13.76 INFO
2022-12-08 19:14:13.80 INFO result file output.csv is not existed or empty
2022-12-08 19:14:13.80 INFO ------------------------------------------------
2022-12-08 19:14:16.178 INFO Shutdown urgently.

Exception ignored in: <function _PythonMultiprocessing.del at 0xffff1a6b6710>
Traceback (most recent call last):
File "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.7/site-packages/mindspore/dataset/engine/datasets.py", line 3165, in del
File "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.7/site-packages/mindspore/dataset/engine/datasets.py", line 2841, in terminate
TypeError: 'NoneType' object is not callable

@zhangjiajin
Copy link
Member

mindspore的版本号是多少?

@marvellee1
Copy link
Author

1.7.0
0V))JAK)5ZZB5@0CWC() B4
预训练模型有提供下载的地址么?

@zhangjiajin
Copy link
Member

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants