diff --git a/nlp/text_classification/bert/pytorch/README.md b/nlp/text_classification/bert/pytorch/README.md
index b5576feb218b77fd6520a3e261fa85cddce253b7..14e65cc84db49b4f212b7e4546877546ae63a29f 100644
--- a/nlp/text_classification/bert/pytorch/README.md
+++ b/nlp/text_classification/bert/pytorch/README.md
@@ -45,7 +45,11 @@ git clone https://huggingface.co/bert-base-uncased
 bash train.sh
 
 # Multiple GPUs on one machine
+## torch 1.x
 bash train_dist.sh
+
+## torch 2.x
+bash train_dist_torch2.sh
 ```
 
 ## Model Results
diff --git a/nlp/text_classification/bert/pytorch/requirements.txt b/nlp/text_classification/bert/pytorch/requirements.txt
index 71dc11dd1a05b9fe065922b130c788271585b98d..844d859247d4a263c2013293285dc911d16c3afa 100644
--- a/nlp/text_classification/bert/pytorch/requirements.txt
+++ b/nlp/text_classification/bert/pytorch/requirements.txt
@@ -1,5 +1,5 @@
 accelerate >= 0.12.0
-datasets >= 1.8.0
+datasets == 2.14.6
 sentencepiece != 0.1.92
 scipy
 scikit-learn
@@ -8,3 +8,4 @@ protobuf
 numpy == 1.21.6
 evaluate == 0.4.1
 transformers == 4.30.2
+pyarrow < 13.0.0
diff --git a/nlp/text_classification/bert/pytorch/train.sh b/nlp/text_classification/bert/pytorch/train.sh
index abedf907cfd945c7ff75c4f6522f933e6c148500..895f28ecc3aade133b5af917edba45479d5aa5c1 100644
--- a/nlp/text_classification/bert/pytorch/train.sh
+++ b/nlp/text_classification/bert/pytorch/train.sh
@@ -16,7 +16,7 @@
 export TASK_NAME=WNLI
 
 python3 run_glue.py \
-  --model_name_or_path ./bert-base-cased \
+  --model_name_or_path ./bert-base-uncased \
   --task_name $TASK_NAME \
   --do_train \
   --do_eval \
diff --git a/nlp/text_classification/bert/pytorch/train_dist.sh b/nlp/text_classification/bert/pytorch/train_dist.sh
index 05794aa34afb1a25dca508df2fbce63df8cfbd4b..e6b30f7f08a731cc8acc7c0e459f0157b3ab03b8 100644
--- a/nlp/text_classification/bert/pytorch/train_dist.sh
+++ b/nlp/text_classification/bert/pytorch/train_dist.sh
@@ -17,7 +17,7 @@
 export TASK_NAME=WNLI
 python3  -m torch.distributed.launch --nproc_per_node=8 --master_port 12333 \
   run_glue.py \
-  --model_name_or_path ./bert-base-cased \
+  --model_name_or_path ./bert-base-uncased \
   --task_name $TASK_NAME \
   --do_train \
   --do_eval \
diff --git a/nlp/text_classification/bert/pytorch/train_dist_torch2.sh b/nlp/text_classification/bert/pytorch/train_dist_torch2.sh
new file mode 100644
index 0000000000000000000000000000000000000000..2ea3cd1ce3443b1625bd82f4afec13dccc900ea0
--- /dev/null
+++ b/nlp/text_classification/bert/pytorch/train_dist_torch2.sh
@@ -0,0 +1,28 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+
+export TASK_NAME=WNLI
+torchrun --nproc_per_node=8 --master_port 12333 \
+  run_glue.py \
+  --model_name_or_path ./bert-base-uncased \
+  --task_name $TASK_NAME \
+  --do_train \
+  --do_eval \
+  --max_seq_length 128 \
+  --per_device_train_batch_size 32 \
+  --learning_rate 2e-5 \
+  --num_train_epochs 5 \
+  --output_dir /tmp/$TASK_NAME/