From 47390b8f53a32eebfe41424af2dc1bc1033786f3 Mon Sep 17 00:00:00 2001 From: "song.jian" Date: Mon, 5 Dec 2022 14:15:46 +0800 Subject: [PATCH 1/3] batch_size cong 512 xiugaidao 256 Signed-off-by: song.jian --- cv/classification/resnest50/pytorch/train_resnest50_amp_dist.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cv/classification/resnest50/pytorch/train_resnest50_amp_dist.sh b/cv/classification/resnest50/pytorch/train_resnest50_amp_dist.sh index af76da2a5..3331e037e 100755 --- a/cv/classification/resnest50/pytorch/train_resnest50_amp_dist.sh +++ b/cv/classification/resnest50/pytorch/train_resnest50_amp_dist.sh @@ -26,5 +26,5 @@ fi cd ${ROOT_DIR} python3 $PYTHONARG ${ROOT_DIR}/run_train.py \ --model resnest50 --dali --dali-cpu --data-path $DATA_PATH \ - --opt fused_sgd --batch-size 512 --lr 0.0125 \ + --opt fused_sgd --batch-size 256 --lr 0.0125 \ --amp --nhwc "$@" -- Gitee From a4bedf6a41c074597716de5fb3c60414958fc28d Mon Sep 17 00:00:00 2001 From: "song.jian" Date: Mon, 5 Dec 2022 17:03:36 +0800 Subject: [PATCH 2/3] update script_name amp_4cards.sh amp_8cards.sh fp32_4cards.sh fp32_8cards.sh fp32_16cards.sh Signed-off-by: song.jian --- cv/classification/resnet50/pytorch/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cv/classification/resnet50/pytorch/README.md b/cv/classification/resnet50/pytorch/README.md index c388ce51c..85bd0f571 100644 --- a/cv/classification/resnet50/pytorch/README.md +++ b/cv/classification/resnet50/pytorch/README.md @@ -21,17 +21,17 @@ bash amp_1card.sh --data-path /path/to/imagenet ``` ### Multiple GPUs on one machine ```bash -bash fp32_4card.sh --data-path /path/to/imagenet -bash fp32_8card.sh --data-path /path/to/imagenet +bash fp32_4cards.sh --data-path /path/to/imagenet +bash fp32_8cards.sh --data-path /path/to/imagenet ``` ### Multiple GPUs on one machine (AMP) ```bash -bash amp_4card.sh --data-path /path/to/imagenet -bash amp_8card.sh --data-path /path/to/imagenet +bash amp_4cards.sh --data-path /path/to/imagenet +bash amp_8cards.sh --data-path /path/to/imagenet ``` ### Multiple GPUs on two machines ```bash -bash fp32_16card.sh --data-path /path/to/imagenet +bash fp32_16cards.sh --data-path /path/to/imagenet ``` ## Results on BI-V100 -- Gitee From 699a559d92447852ef2aff06f69353ad620cf41f Mon Sep 17 00:00:00 2001 From: "song.jian" Date: Mon, 5 Dec 2022 17:27:25 +0800 Subject: [PATCH 3/3] train_dist.sh modify dist_train.sh add step: apt install dos2unix Signed-off-by: song.jian --- cv/detection/autoassign/pytorch/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cv/detection/autoassign/pytorch/README.md b/cv/detection/autoassign/pytorch/README.md index 3037ca1ca..02a110b62 100755 --- a/cv/detection/autoassign/pytorch/README.md +++ b/cv/detection/autoassign/pytorch/README.md @@ -17,6 +17,7 @@ $ MMCV_WITH_OPS=1 python3 setup.py build && cp build/lib.linux*/mmcv/_ext.cpytho ```bash $ cd /path/to/modelzoo/cv/detection/autoassign/pytorch $ mkdir -p data && cd data +$ apt install dos2unix # Download from homepage of coco: https://cocodataset.org/ ``` @@ -30,7 +31,7 @@ $ python3 train.py [training args] # config file can be found in ### Multiple GPUs on one machine ```bash -$ bash train_dist.sh [training args] # config file can be found in the configs directory +$ bash dist_train.sh [training args] # config file can be found in the configs directory ``` ## Reference -- Gitee