diff --git a/speech/speech_recognition/rnnt/pytorch/README.md b/speech/speech_recognition/rnnt/pytorch/README.md index 24c012c79e6e84a586653901567afa23307174f9..3d3571ae084a7e415ad42b3b5606cb56151f9308 100644 --- a/speech/speech_recognition/rnnt/pytorch/README.md +++ b/speech/speech_recognition/rnnt/pytorch/README.md @@ -13,20 +13,24 @@ bash install.sh ## Step 2: Preparing datasets download LibriSpeech [http://www.openslr.org/12](http://www.openslr.org/12) ``` -bash scripts/download_librispeech.sh $DATASET_DIR +bash scripts/download_librispeech.sh ${DATA_ROOT_DIR} ``` preprocess LibriSpeech ``` -bash scripts/preprocess_librispeech.sh $DATASET_DIR +bash scripts/preprocess_librispeech.sh ${DATA_ROOT_DIR} ``` ## Step 3: Training +### Setup config yaml +```shell +sed -i "s#MODIFY_DATASET_DIR#${DATA_ROOT_DIR}/LibriSpeech#g" configs/baseline_v3-1023sp.yaml +``` ### Multiple GPUs on one machine ``` -cd scripts -bash train_rnnt_1x8.sh $OUTPUT_DIR $DATA_DIR +mkdir -p output/ +bash scripts/train_rnnt_1x8.sh output/ ${DATA_ROOT_DIR}/LibriSpeech ``` Following conditions were tested, you can run any of them below: @@ -46,4 +50,4 @@ Following conditions were tested, you can run any of them below: ## Reference -https://github.com/mlcommons/training/tree/master/rnn_speech_recognition/pytorch \ No newline at end of file +https://github.com/mlcommons/training/tree/master/rnn_speech_recognition/pytorch diff --git a/speech/speech_recognition/rnnt/pytorch/configs/baseline_v3-1023sp.yaml b/speech/speech_recognition/rnnt/pytorch/configs/baseline_v3-1023sp.yaml index 92aef3093785fdf2f7c0165395a8814e10df1c29..08532c8b9f6603e81a547033152bffe219cdf109 100644 --- a/speech/speech_recognition/rnnt/pytorch/configs/baseline_v3-1023sp.yaml +++ b/speech/speech_recognition/rnnt/pytorch/configs/baseline_v3-1023sp.yaml @@ -17,7 +17,7 @@ # tokenizer: - sentpiece_model: /home/lin.wu/workspace/rnnt/datasets/sentencepieces/librispeech1023.model + sentpiece_model: MODIFY_DATASET_DIR/sentencepieces/librispeech1023.model labels: [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"] diff --git a/speech/speech_recognition/rnnt/pytorch/install.sh b/speech/speech_recognition/rnnt/pytorch/install.sh index 394596dfce4ccc711fb75b4a27a9a40e5ed34dc2..e74589c58b5e070df1e4dc81f00c26e5d5ce06ff 100755 --- a/speech/speech_recognition/rnnt/pytorch/install.sh +++ b/speech/speech_recognition/rnnt/pytorch/install.sh @@ -13,14 +13,32 @@ yum install -y epel-release yum install -y jq pip install -r requirements.txt -######## install warprnnt_pytorch -git clone https://github.com/HawkAaron/warp-transducer deps/warp-transducer +######## prepare env +# clean deps/ +rm -rf deps/ +mkdir -p deps/ +# download openmp-13.0.1.src.tar.xz +cd ./deps +wget "https://github.com/llvm/llvm-project/releases/download/llvmorg-13.0.1/openmp-13.0.1.src.tar.xz" +tar -xvJf openmp-13.0.1.src.tar.xz && mv openmp-13.0.1.src openmp +cd openmp/ +mkdir build && cd build/ + +OPENMP_INSTALL_PREFIX=/usr/local/llvmopenmp +cmake -G "Unix Makefiles" -DCMAKE_INSTALL_PREFIX=${OPENMP_INSTALL_PREFIX} -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=On -DCMAKE_CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++ -DCMAKE_C_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/gcc ../ +make && make install + +cp ${OPENMP_INSTALL_PREFIX}/lib/libomp.so /opt/sw_home/local/lib64/libomp.so +cp ${OPENMP_INSTALL_PREFIX}/include/omp.h /opt/sw_home/local/lib64/clang/13.0.1/include/omp.h + +######## install warp-transducer +## back to deps/ +cd ../../ +git clone https://github.com/HawkAaron/warp-transducer COMMIT_SHA=f546575109111c455354861a0567c8aa794208a2 -cd deps/warp-transducer && git checkout $COMMIT_SHA +cd warp-transducer && git checkout $COMMIT_SHA mkdir build && cd build -######## solve lmp not find error -cp /opt/sw_home/local/lib64/libomp.so.1 /opt/sw_home/local/lib64/libomp.so export CUDA_HOME=/opt/sw_home/local/cuda export CC=/opt/sw_home/local/bin/clang export CXX=/opt/sw_home/local/bin/clang++ diff --git a/speech/speech_recognition/rnnt/pytorch/scripts/train_rnnt_1x1.sh b/speech/speech_recognition/rnnt/pytorch/scripts/train_rnnt_1x1.sh index e92ec104061e3ec1922010ff52469bbd5feb4d0b..044cb5db0f719e24b9052431cadeb536f28a9d4c 100755 --- a/speech/speech_recognition/rnnt/pytorch/scripts/train_rnnt_1x1.sh +++ b/speech/speech_recognition/rnnt/pytorch/scripts/train_rnnt_1x1.sh @@ -23,4 +23,4 @@ set -a : ${NUM_GPUS:=1} : ${GRAD_ACCUMULATION_STEPS:=64} -bash ./scripts/train_rnnt_dist_1x8.sh "$@" +bash ./scripts/train_rnnt_1x8.sh "$@"